{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7015341949963622, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.769177437454527e-05, "grad_norm": 7.59375, "learning_rate": 3e-06, "loss": 4.1911, "step": 1 }, { "epoch": 0.00017538354874909053, "grad_norm": 7.6875, "learning_rate": 6e-06, "loss": 4.1743, "step": 2 }, { "epoch": 0.0002630753231236358, "grad_norm": 8.1875, "learning_rate": 9e-06, "loss": 4.1165, "step": 3 }, { "epoch": 0.00035076709749818106, "grad_norm": 8.0, "learning_rate": 1.2e-05, "loss": 4.2652, "step": 4 }, { "epoch": 0.00043845887187272633, "grad_norm": 7.9375, "learning_rate": 1.5e-05, "loss": 4.1691, "step": 5 }, { "epoch": 0.0005261506462472716, "grad_norm": 8.0625, "learning_rate": 1.8e-05, "loss": 4.2089, "step": 6 }, { "epoch": 0.0006138424206218169, "grad_norm": 8.25, "learning_rate": 2.1000000000000002e-05, "loss": 4.1677, "step": 7 }, { "epoch": 0.0007015341949963621, "grad_norm": 7.8125, "learning_rate": 2.4e-05, "loss": 4.1564, "step": 8 }, { "epoch": 0.0007892259693709074, "grad_norm": 7.75, "learning_rate": 2.7e-05, "loss": 4.1722, "step": 9 }, { "epoch": 0.0008769177437454527, "grad_norm": 7.3125, "learning_rate": 3e-05, "loss": 4.1554, "step": 10 }, { "epoch": 0.0009646095181199979, "grad_norm": 7.53125, "learning_rate": 3.2999999999999996e-05, "loss": 4.1511, "step": 11 }, { "epoch": 0.0010523012924945432, "grad_norm": 7.1875, "learning_rate": 3.6e-05, "loss": 4.152, "step": 12 }, { "epoch": 0.0011399930668690886, "grad_norm": 6.8125, "learning_rate": 3.9e-05, "loss": 4.1581, "step": 13 }, { "epoch": 0.0012276848412436337, "grad_norm": 6.625, "learning_rate": 4.2000000000000004e-05, "loss": 4.0485, "step": 14 }, { "epoch": 0.001315376615618179, "grad_norm": 6.28125, "learning_rate": 4.4999999999999996e-05, "loss": 4.083, "step": 15 }, { "epoch": 0.0014030683899927242, "grad_norm": 5.625, "learning_rate": 4.8e-05, "loss": 4.075, "step": 16 }, { "epoch": 0.0014907601643672696, "grad_norm": 5.625, "learning_rate": 5.1000000000000006e-05, "loss": 4.0571, "step": 17 }, { "epoch": 0.0015784519387418148, "grad_norm": 5.53125, "learning_rate": 5.4e-05, "loss": 4.0213, "step": 18 }, { "epoch": 0.0016661437131163602, "grad_norm": 4.5625, "learning_rate": 5.7e-05, "loss": 4.0436, "step": 19 }, { "epoch": 0.0017538354874909053, "grad_norm": 4.6875, "learning_rate": 6e-05, "loss": 4.0306, "step": 20 }, { "epoch": 0.0018415272618654507, "grad_norm": 4.125, "learning_rate": 6.3e-05, "loss": 4.0364, "step": 21 }, { "epoch": 0.0019292190362399958, "grad_norm": 3.875, "learning_rate": 6.599999999999999e-05, "loss": 3.9774, "step": 22 }, { "epoch": 0.002016910810614541, "grad_norm": 3.421875, "learning_rate": 6.9e-05, "loss": 3.9513, "step": 23 }, { "epoch": 0.0021046025849890864, "grad_norm": 3.21875, "learning_rate": 7.2e-05, "loss": 3.9453, "step": 24 }, { "epoch": 0.0021922943593636317, "grad_norm": 2.734375, "learning_rate": 7.500000000000001e-05, "loss": 3.9338, "step": 25 }, { "epoch": 0.002279986133738177, "grad_norm": 2.375, "learning_rate": 7.8e-05, "loss": 3.9318, "step": 26 }, { "epoch": 0.0023676779081127225, "grad_norm": 1.921875, "learning_rate": 8.1e-05, "loss": 3.8758, "step": 27 }, { "epoch": 0.0024553696824872674, "grad_norm": 1.828125, "learning_rate": 8.400000000000001e-05, "loss": 3.8885, "step": 28 }, { "epoch": 0.002543061456861813, "grad_norm": 1.765625, "learning_rate": 8.7e-05, "loss": 3.8412, "step": 29 }, { "epoch": 0.002630753231236358, "grad_norm": 1.65625, "learning_rate": 8.999999999999999e-05, "loss": 3.8114, "step": 30 }, { "epoch": 0.0027184450056109036, "grad_norm": 1.5234375, "learning_rate": 9.3e-05, "loss": 3.778, "step": 31 }, { "epoch": 0.0028061367799854485, "grad_norm": 1.515625, "learning_rate": 9.6e-05, "loss": 3.7355, "step": 32 }, { "epoch": 0.002893828554359994, "grad_norm": 1.5, "learning_rate": 9.900000000000001e-05, "loss": 3.7612, "step": 33 }, { "epoch": 0.0029815203287345392, "grad_norm": 1.390625, "learning_rate": 0.00010200000000000001, "loss": 3.7869, "step": 34 }, { "epoch": 0.0030692121031090846, "grad_norm": 1.328125, "learning_rate": 0.00010500000000000002, "loss": 3.6998, "step": 35 }, { "epoch": 0.0031569038774836296, "grad_norm": 1.3203125, "learning_rate": 0.000108, "loss": 3.645, "step": 36 }, { "epoch": 0.003244595651858175, "grad_norm": 1.265625, "learning_rate": 0.000111, "loss": 3.6778, "step": 37 }, { "epoch": 0.0033322874262327203, "grad_norm": 1.28125, "learning_rate": 0.000114, "loss": 3.6011, "step": 38 }, { "epoch": 0.0034199792006072657, "grad_norm": 1.28125, "learning_rate": 0.000117, "loss": 3.6147, "step": 39 }, { "epoch": 0.0035076709749818106, "grad_norm": 1.2265625, "learning_rate": 0.00012, "loss": 3.5427, "step": 40 }, { "epoch": 0.003595362749356356, "grad_norm": 1.1484375, "learning_rate": 0.000123, "loss": 3.5252, "step": 41 }, { "epoch": 0.0036830545237309014, "grad_norm": 1.140625, "learning_rate": 0.000126, "loss": 3.5946, "step": 42 }, { "epoch": 0.0037707462981054467, "grad_norm": 1.1328125, "learning_rate": 0.000129, "loss": 3.498, "step": 43 }, { "epoch": 0.0038584380724799917, "grad_norm": 1.4140625, "learning_rate": 0.00013199999999999998, "loss": 3.4045, "step": 44 }, { "epoch": 0.0039461298468545375, "grad_norm": 1.46875, "learning_rate": 0.000135, "loss": 3.4418, "step": 45 }, { "epoch": 0.004033821621229082, "grad_norm": 1.5390625, "learning_rate": 0.000138, "loss": 3.3624, "step": 46 }, { "epoch": 0.004121513395603627, "grad_norm": 1.6328125, "learning_rate": 0.000141, "loss": 3.3476, "step": 47 }, { "epoch": 0.004209205169978173, "grad_norm": 1.5078125, "learning_rate": 0.000144, "loss": 3.2416, "step": 48 }, { "epoch": 0.004296896944352718, "grad_norm": 1.171875, "learning_rate": 0.000147, "loss": 3.263, "step": 49 }, { "epoch": 0.0043845887187272635, "grad_norm": 1.1328125, "learning_rate": 0.00015000000000000001, "loss": 3.2391, "step": 50 }, { "epoch": 0.004472280493101809, "grad_norm": 0.8671875, "learning_rate": 0.000153, "loss": 3.1366, "step": 51 }, { "epoch": 0.004559972267476354, "grad_norm": 0.75390625, "learning_rate": 0.000156, "loss": 3.103, "step": 52 }, { "epoch": 0.0046476640418509, "grad_norm": 0.77734375, "learning_rate": 0.000159, "loss": 3.1938, "step": 53 }, { "epoch": 0.004735355816225445, "grad_norm": 0.69140625, "learning_rate": 0.000162, "loss": 3.1503, "step": 54 }, { "epoch": 0.0048230475905999895, "grad_norm": 0.70703125, "learning_rate": 0.000165, "loss": 3.1695, "step": 55 }, { "epoch": 0.004910739364974535, "grad_norm": 0.67578125, "learning_rate": 0.00016800000000000002, "loss": 3.0348, "step": 56 }, { "epoch": 0.00499843113934908, "grad_norm": 0.68359375, "learning_rate": 0.000171, "loss": 3.0016, "step": 57 }, { "epoch": 0.005086122913723626, "grad_norm": 0.640625, "learning_rate": 0.000174, "loss": 2.9536, "step": 58 }, { "epoch": 0.005173814688098171, "grad_norm": 0.59765625, "learning_rate": 0.000177, "loss": 2.9378, "step": 59 }, { "epoch": 0.005261506462472716, "grad_norm": 0.56640625, "learning_rate": 0.00017999999999999998, "loss": 2.9442, "step": 60 }, { "epoch": 0.005349198236847262, "grad_norm": 0.55078125, "learning_rate": 0.000183, "loss": 2.8868, "step": 61 }, { "epoch": 0.005436890011221807, "grad_norm": 0.55078125, "learning_rate": 0.000186, "loss": 2.8555, "step": 62 }, { "epoch": 0.005524581785596352, "grad_norm": 0.55078125, "learning_rate": 0.000189, "loss": 2.8699, "step": 63 }, { "epoch": 0.005612273559970897, "grad_norm": 0.59375, "learning_rate": 0.000192, "loss": 2.9004, "step": 64 }, { "epoch": 0.005699965334345442, "grad_norm": 0.625, "learning_rate": 0.00019500000000000002, "loss": 2.8415, "step": 65 }, { "epoch": 0.005787657108719988, "grad_norm": 0.5703125, "learning_rate": 0.00019800000000000002, "loss": 2.756, "step": 66 }, { "epoch": 0.005875348883094533, "grad_norm": 0.54296875, "learning_rate": 0.000201, "loss": 2.8199, "step": 67 }, { "epoch": 0.0059630406574690785, "grad_norm": 0.52734375, "learning_rate": 0.00020400000000000003, "loss": 2.753, "step": 68 }, { "epoch": 0.006050732431843624, "grad_norm": 0.46484375, "learning_rate": 0.00020700000000000002, "loss": 2.677, "step": 69 }, { "epoch": 0.006138424206218169, "grad_norm": 0.5546875, "learning_rate": 0.00021000000000000004, "loss": 2.7099, "step": 70 }, { "epoch": 0.006226115980592715, "grad_norm": 0.5859375, "learning_rate": 0.00021299999999999997, "loss": 2.6848, "step": 71 }, { "epoch": 0.006313807754967259, "grad_norm": 0.439453125, "learning_rate": 0.000216, "loss": 2.6327, "step": 72 }, { "epoch": 0.0064014995293418045, "grad_norm": 0.43359375, "learning_rate": 0.00021899999999999998, "loss": 2.6438, "step": 73 }, { "epoch": 0.00648919130371635, "grad_norm": 0.44921875, "learning_rate": 0.000222, "loss": 2.6331, "step": 74 }, { "epoch": 0.006576883078090895, "grad_norm": 0.4296875, "learning_rate": 0.000225, "loss": 2.616, "step": 75 }, { "epoch": 0.006664574852465441, "grad_norm": 0.3828125, "learning_rate": 0.000228, "loss": 2.4532, "step": 76 }, { "epoch": 0.006752266626839986, "grad_norm": 0.318359375, "learning_rate": 0.000231, "loss": 2.4668, "step": 77 }, { "epoch": 0.006839958401214531, "grad_norm": 0.3203125, "learning_rate": 0.000234, "loss": 2.4524, "step": 78 }, { "epoch": 0.006927650175589077, "grad_norm": 0.353515625, "learning_rate": 0.00023700000000000001, "loss": 2.4424, "step": 79 }, { "epoch": 0.007015341949963621, "grad_norm": 0.30859375, "learning_rate": 0.00024, "loss": 2.403, "step": 80 }, { "epoch": 0.007103033724338167, "grad_norm": 0.294921875, "learning_rate": 0.00024300000000000002, "loss": 2.4171, "step": 81 }, { "epoch": 0.007190725498712712, "grad_norm": 0.298828125, "learning_rate": 0.000246, "loss": 2.4317, "step": 82 }, { "epoch": 0.007278417273087257, "grad_norm": 0.259765625, "learning_rate": 0.00024900000000000004, "loss": 2.3678, "step": 83 }, { "epoch": 0.007366109047461803, "grad_norm": 0.341796875, "learning_rate": 0.000252, "loss": 2.3322, "step": 84 }, { "epoch": 0.007453800821836348, "grad_norm": 0.310546875, "learning_rate": 0.000255, "loss": 2.2875, "step": 85 }, { "epoch": 0.0075414925962108935, "grad_norm": 0.248046875, "learning_rate": 0.000258, "loss": 2.2755, "step": 86 }, { "epoch": 0.007629184370585439, "grad_norm": 0.2578125, "learning_rate": 0.000261, "loss": 2.245, "step": 87 }, { "epoch": 0.007716876144959983, "grad_norm": 0.220703125, "learning_rate": 0.00026399999999999997, "loss": 2.1879, "step": 88 }, { "epoch": 0.007804567919334529, "grad_norm": 0.279296875, "learning_rate": 0.000267, "loss": 2.263, "step": 89 }, { "epoch": 0.007892259693709075, "grad_norm": 0.240234375, "learning_rate": 0.00027, "loss": 2.2339, "step": 90 }, { "epoch": 0.00797995146808362, "grad_norm": 0.23828125, "learning_rate": 0.000273, "loss": 2.2315, "step": 91 }, { "epoch": 0.008067643242458164, "grad_norm": 0.2119140625, "learning_rate": 0.000276, "loss": 2.1785, "step": 92 }, { "epoch": 0.00815533501683271, "grad_norm": 0.216796875, "learning_rate": 0.000279, "loss": 2.1301, "step": 93 }, { "epoch": 0.008243026791207255, "grad_norm": 0.203125, "learning_rate": 0.000282, "loss": 2.1555, "step": 94 }, { "epoch": 0.008330718565581801, "grad_norm": 0.2734375, "learning_rate": 0.000285, "loss": 2.0664, "step": 95 }, { "epoch": 0.008418410339956345, "grad_norm": 0.212890625, "learning_rate": 0.000288, "loss": 2.0808, "step": 96 }, { "epoch": 0.008506102114330892, "grad_norm": 0.2060546875, "learning_rate": 0.000291, "loss": 1.9987, "step": 97 }, { "epoch": 0.008593793888705436, "grad_norm": 0.2177734375, "learning_rate": 0.000294, "loss": 2.0368, "step": 98 }, { "epoch": 0.008681485663079982, "grad_norm": 0.1953125, "learning_rate": 0.000297, "loss": 2.0081, "step": 99 }, { "epoch": 0.008769177437454527, "grad_norm": 0.28515625, "learning_rate": 0.00030000000000000003, "loss": 2.0451, "step": 100 }, { "epoch": 0.008856869211829071, "grad_norm": 0.1875, "learning_rate": 0.00030300000000000005, "loss": 1.9185, "step": 101 }, { "epoch": 0.008944560986203618, "grad_norm": 0.18359375, "learning_rate": 0.000306, "loss": 1.9602, "step": 102 }, { "epoch": 0.009032252760578162, "grad_norm": 0.197265625, "learning_rate": 0.000309, "loss": 1.9397, "step": 103 }, { "epoch": 0.009119944534952708, "grad_norm": 0.271484375, "learning_rate": 0.000312, "loss": 1.9686, "step": 104 }, { "epoch": 0.009207636309327253, "grad_norm": 0.2314453125, "learning_rate": 0.000315, "loss": 1.903, "step": 105 }, { "epoch": 0.0092953280837018, "grad_norm": 0.1923828125, "learning_rate": 0.000318, "loss": 1.9059, "step": 106 }, { "epoch": 0.009383019858076344, "grad_norm": 0.189453125, "learning_rate": 0.000321, "loss": 1.8646, "step": 107 }, { "epoch": 0.00947071163245089, "grad_norm": 0.1826171875, "learning_rate": 0.000324, "loss": 1.8592, "step": 108 }, { "epoch": 0.009558403406825434, "grad_norm": 0.2314453125, "learning_rate": 0.000327, "loss": 1.8891, "step": 109 }, { "epoch": 0.009646095181199979, "grad_norm": 0.1474609375, "learning_rate": 0.00033, "loss": 1.7582, "step": 110 }, { "epoch": 0.009733786955574525, "grad_norm": 0.1708984375, "learning_rate": 0.000333, "loss": 1.7959, "step": 111 }, { "epoch": 0.00982147872994907, "grad_norm": 0.2255859375, "learning_rate": 0.00033600000000000004, "loss": 1.8503, "step": 112 }, { "epoch": 0.009909170504323616, "grad_norm": 0.1640625, "learning_rate": 0.000339, "loss": 1.8528, "step": 113 }, { "epoch": 0.00999686227869816, "grad_norm": 0.1474609375, "learning_rate": 0.000342, "loss": 1.7961, "step": 114 }, { "epoch": 0.010084554053072707, "grad_norm": 0.1337890625, "learning_rate": 0.00034500000000000004, "loss": 1.7692, "step": 115 }, { "epoch": 0.010172245827447251, "grad_norm": 0.1376953125, "learning_rate": 0.000348, "loss": 1.781, "step": 116 }, { "epoch": 0.010259937601821796, "grad_norm": 0.1669921875, "learning_rate": 0.000351, "loss": 1.7401, "step": 117 }, { "epoch": 0.010347629376196342, "grad_norm": 0.1328125, "learning_rate": 0.000354, "loss": 1.7695, "step": 118 }, { "epoch": 0.010435321150570886, "grad_norm": 0.1337890625, "learning_rate": 0.000357, "loss": 1.744, "step": 119 }, { "epoch": 0.010523012924945433, "grad_norm": 0.166015625, "learning_rate": 0.00035999999999999997, "loss": 1.7182, "step": 120 }, { "epoch": 0.010610704699319977, "grad_norm": 0.1474609375, "learning_rate": 0.000363, "loss": 1.7426, "step": 121 }, { "epoch": 0.010698396473694523, "grad_norm": 0.1416015625, "learning_rate": 0.000366, "loss": 1.6804, "step": 122 }, { "epoch": 0.010786088248069068, "grad_norm": 0.12353515625, "learning_rate": 0.000369, "loss": 1.753, "step": 123 }, { "epoch": 0.010873780022443614, "grad_norm": 0.14453125, "learning_rate": 0.000372, "loss": 1.7363, "step": 124 }, { "epoch": 0.010961471796818159, "grad_norm": 0.138671875, "learning_rate": 0.000375, "loss": 1.7182, "step": 125 }, { "epoch": 0.011049163571192703, "grad_norm": 0.158203125, "learning_rate": 0.000378, "loss": 1.6622, "step": 126 }, { "epoch": 0.01113685534556725, "grad_norm": 0.1376953125, "learning_rate": 0.000381, "loss": 1.6381, "step": 127 }, { "epoch": 0.011224547119941794, "grad_norm": 0.1142578125, "learning_rate": 0.000384, "loss": 1.6721, "step": 128 }, { "epoch": 0.01131223889431634, "grad_norm": 0.1669921875, "learning_rate": 0.00038700000000000003, "loss": 1.6262, "step": 129 }, { "epoch": 0.011399930668690885, "grad_norm": 0.1376953125, "learning_rate": 0.00039000000000000005, "loss": 1.7461, "step": 130 }, { "epoch": 0.011487622443065431, "grad_norm": 0.2353515625, "learning_rate": 0.000393, "loss": 1.6759, "step": 131 }, { "epoch": 0.011575314217439975, "grad_norm": 0.208984375, "learning_rate": 0.00039600000000000003, "loss": 1.6878, "step": 132 }, { "epoch": 0.011663005991814522, "grad_norm": 0.29296875, "learning_rate": 0.00039900000000000005, "loss": 1.6794, "step": 133 }, { "epoch": 0.011750697766189066, "grad_norm": 0.2080078125, "learning_rate": 0.000402, "loss": 1.6118, "step": 134 }, { "epoch": 0.01183838954056361, "grad_norm": 0.1982421875, "learning_rate": 0.00040500000000000003, "loss": 1.6834, "step": 135 }, { "epoch": 0.011926081314938157, "grad_norm": 0.1474609375, "learning_rate": 0.00040800000000000005, "loss": 1.6754, "step": 136 }, { "epoch": 0.012013773089312701, "grad_norm": 0.1572265625, "learning_rate": 0.000411, "loss": 1.6149, "step": 137 }, { "epoch": 0.012101464863687248, "grad_norm": 0.10546875, "learning_rate": 0.00041400000000000003, "loss": 1.7056, "step": 138 }, { "epoch": 0.012189156638061792, "grad_norm": 0.1650390625, "learning_rate": 0.00041700000000000005, "loss": 1.6293, "step": 139 }, { "epoch": 0.012276848412436338, "grad_norm": 0.140625, "learning_rate": 0.00042000000000000007, "loss": 1.6483, "step": 140 }, { "epoch": 0.012364540186810883, "grad_norm": 0.1181640625, "learning_rate": 0.000423, "loss": 1.6171, "step": 141 }, { "epoch": 0.01245223196118543, "grad_norm": 0.12255859375, "learning_rate": 0.00042599999999999995, "loss": 1.6129, "step": 142 }, { "epoch": 0.012539923735559974, "grad_norm": 0.1337890625, "learning_rate": 0.00042899999999999997, "loss": 1.6066, "step": 143 }, { "epoch": 0.012627615509934518, "grad_norm": 0.154296875, "learning_rate": 0.000432, "loss": 1.5817, "step": 144 }, { "epoch": 0.012715307284309064, "grad_norm": 0.1162109375, "learning_rate": 0.000435, "loss": 1.666, "step": 145 }, { "epoch": 0.012802999058683609, "grad_norm": 0.1123046875, "learning_rate": 0.00043799999999999997, "loss": 1.6477, "step": 146 }, { "epoch": 0.012890690833058155, "grad_norm": 0.1533203125, "learning_rate": 0.000441, "loss": 1.5617, "step": 147 }, { "epoch": 0.0129783826074327, "grad_norm": 0.154296875, "learning_rate": 0.000444, "loss": 1.5307, "step": 148 }, { "epoch": 0.013066074381807246, "grad_norm": 0.09716796875, "learning_rate": 0.00044699999999999997, "loss": 1.5576, "step": 149 }, { "epoch": 0.01315376615618179, "grad_norm": 0.1533203125, "learning_rate": 0.00045, "loss": 1.5612, "step": 150 }, { "epoch": 0.013241457930556335, "grad_norm": 0.1064453125, "learning_rate": 0.000453, "loss": 1.5933, "step": 151 }, { "epoch": 0.013329149704930881, "grad_norm": 0.140625, "learning_rate": 0.000456, "loss": 1.5505, "step": 152 }, { "epoch": 0.013416841479305426, "grad_norm": 0.11376953125, "learning_rate": 0.000459, "loss": 1.4969, "step": 153 }, { "epoch": 0.013504533253679972, "grad_norm": 0.1318359375, "learning_rate": 0.000462, "loss": 1.5668, "step": 154 }, { "epoch": 0.013592225028054516, "grad_norm": 0.095703125, "learning_rate": 0.000465, "loss": 1.5287, "step": 155 }, { "epoch": 0.013679916802429063, "grad_norm": 0.1044921875, "learning_rate": 0.000468, "loss": 1.545, "step": 156 }, { "epoch": 0.013767608576803607, "grad_norm": 0.1298828125, "learning_rate": 0.000471, "loss": 1.615, "step": 157 }, { "epoch": 0.013855300351178153, "grad_norm": 0.134765625, "learning_rate": 0.00047400000000000003, "loss": 1.5476, "step": 158 }, { "epoch": 0.013942992125552698, "grad_norm": 0.10546875, "learning_rate": 0.000477, "loss": 1.5439, "step": 159 }, { "epoch": 0.014030683899927242, "grad_norm": 0.189453125, "learning_rate": 0.00048, "loss": 1.5647, "step": 160 }, { "epoch": 0.014118375674301789, "grad_norm": 0.11669921875, "learning_rate": 0.00048300000000000003, "loss": 1.6254, "step": 161 }, { "epoch": 0.014206067448676333, "grad_norm": 0.10791015625, "learning_rate": 0.00048600000000000005, "loss": 1.4983, "step": 162 }, { "epoch": 0.01429375922305088, "grad_norm": 0.11474609375, "learning_rate": 0.0004890000000000001, "loss": 1.5548, "step": 163 }, { "epoch": 0.014381450997425424, "grad_norm": 0.11376953125, "learning_rate": 0.000492, "loss": 1.4867, "step": 164 }, { "epoch": 0.01446914277179997, "grad_norm": 0.1162109375, "learning_rate": 0.000495, "loss": 1.6054, "step": 165 }, { "epoch": 0.014556834546174515, "grad_norm": 0.09716796875, "learning_rate": 0.0004980000000000001, "loss": 1.553, "step": 166 }, { "epoch": 0.014644526320549061, "grad_norm": 0.162109375, "learning_rate": 0.000501, "loss": 1.568, "step": 167 }, { "epoch": 0.014732218094923605, "grad_norm": 0.11328125, "learning_rate": 0.000504, "loss": 1.5792, "step": 168 }, { "epoch": 0.01481990986929815, "grad_norm": 0.1044921875, "learning_rate": 0.0005070000000000001, "loss": 1.5449, "step": 169 }, { "epoch": 0.014907601643672696, "grad_norm": 0.1103515625, "learning_rate": 0.00051, "loss": 1.5593, "step": 170 }, { "epoch": 0.01499529341804724, "grad_norm": 0.1064453125, "learning_rate": 0.000513, "loss": 1.4883, "step": 171 }, { "epoch": 0.015082985192421787, "grad_norm": 0.1064453125, "learning_rate": 0.000516, "loss": 1.529, "step": 172 }, { "epoch": 0.015170676966796331, "grad_norm": 0.10302734375, "learning_rate": 0.0005189999999999999, "loss": 1.466, "step": 173 }, { "epoch": 0.015258368741170878, "grad_norm": 0.09033203125, "learning_rate": 0.000522, "loss": 1.4907, "step": 174 }, { "epoch": 0.015346060515545422, "grad_norm": 0.16796875, "learning_rate": 0.000525, "loss": 1.5739, "step": 175 }, { "epoch": 0.015433752289919967, "grad_norm": 0.115234375, "learning_rate": 0.0005279999999999999, "loss": 1.5142, "step": 176 }, { "epoch": 0.015521444064294513, "grad_norm": 0.1904296875, "learning_rate": 0.000531, "loss": 1.4573, "step": 177 }, { "epoch": 0.015609135838669057, "grad_norm": 0.1142578125, "learning_rate": 0.000534, "loss": 1.5181, "step": 178 }, { "epoch": 0.015696827613043602, "grad_norm": 0.1572265625, "learning_rate": 0.000537, "loss": 1.4548, "step": 179 }, { "epoch": 0.01578451938741815, "grad_norm": 0.10498046875, "learning_rate": 0.00054, "loss": 1.4154, "step": 180 }, { "epoch": 0.015872211161792694, "grad_norm": 0.11376953125, "learning_rate": 0.000543, "loss": 1.4346, "step": 181 }, { "epoch": 0.01595990293616724, "grad_norm": 0.103515625, "learning_rate": 0.000546, "loss": 1.6055, "step": 182 }, { "epoch": 0.016047594710541783, "grad_norm": 0.11376953125, "learning_rate": 0.000549, "loss": 1.4598, "step": 183 }, { "epoch": 0.016135286484916328, "grad_norm": 0.11572265625, "learning_rate": 0.000552, "loss": 1.4608, "step": 184 }, { "epoch": 0.016222978259290876, "grad_norm": 0.099609375, "learning_rate": 0.000555, "loss": 1.5092, "step": 185 }, { "epoch": 0.01631067003366542, "grad_norm": 0.099609375, "learning_rate": 0.000558, "loss": 1.5338, "step": 186 }, { "epoch": 0.016398361808039965, "grad_norm": 0.1708984375, "learning_rate": 0.000561, "loss": 1.4916, "step": 187 }, { "epoch": 0.01648605358241451, "grad_norm": 0.11376953125, "learning_rate": 0.000564, "loss": 1.4815, "step": 188 }, { "epoch": 0.016573745356789057, "grad_norm": 0.2470703125, "learning_rate": 0.000567, "loss": 1.4502, "step": 189 }, { "epoch": 0.016661437131163602, "grad_norm": 0.10302734375, "learning_rate": 0.00057, "loss": 1.5195, "step": 190 }, { "epoch": 0.016749128905538146, "grad_norm": 0.15234375, "learning_rate": 0.000573, "loss": 1.545, "step": 191 }, { "epoch": 0.01683682067991269, "grad_norm": 0.146484375, "learning_rate": 0.000576, "loss": 1.5517, "step": 192 }, { "epoch": 0.016924512454287235, "grad_norm": 0.10791015625, "learning_rate": 0.000579, "loss": 1.5469, "step": 193 }, { "epoch": 0.017012204228661783, "grad_norm": 0.1416015625, "learning_rate": 0.000582, "loss": 1.4311, "step": 194 }, { "epoch": 0.017099896003036328, "grad_norm": 0.10888671875, "learning_rate": 0.000585, "loss": 1.5292, "step": 195 }, { "epoch": 0.017187587777410872, "grad_norm": 0.1240234375, "learning_rate": 0.000588, "loss": 1.4326, "step": 196 }, { "epoch": 0.017275279551785417, "grad_norm": 0.10205078125, "learning_rate": 0.000591, "loss": 1.4556, "step": 197 }, { "epoch": 0.017362971326159965, "grad_norm": 0.09521484375, "learning_rate": 0.000594, "loss": 1.4719, "step": 198 }, { "epoch": 0.01745066310053451, "grad_norm": 0.11572265625, "learning_rate": 0.0005970000000000001, "loss": 1.5113, "step": 199 }, { "epoch": 0.017538354874909054, "grad_norm": 0.10498046875, "learning_rate": 0.0006000000000000001, "loss": 1.4525, "step": 200 }, { "epoch": 0.0176260466492836, "grad_norm": 0.0888671875, "learning_rate": 0.000603, "loss": 1.4616, "step": 201 }, { "epoch": 0.017713738423658143, "grad_norm": 0.1474609375, "learning_rate": 0.0006060000000000001, "loss": 1.4662, "step": 202 }, { "epoch": 0.01780143019803269, "grad_norm": 0.1181640625, "learning_rate": 0.0006090000000000001, "loss": 1.4347, "step": 203 }, { "epoch": 0.017889121972407235, "grad_norm": 0.09912109375, "learning_rate": 0.000612, "loss": 1.5131, "step": 204 }, { "epoch": 0.01797681374678178, "grad_norm": 0.1220703125, "learning_rate": 0.000615, "loss": 1.4848, "step": 205 }, { "epoch": 0.018064505521156324, "grad_norm": 0.10498046875, "learning_rate": 0.000618, "loss": 1.4201, "step": 206 }, { "epoch": 0.018152197295530872, "grad_norm": 0.1103515625, "learning_rate": 0.000621, "loss": 1.4722, "step": 207 }, { "epoch": 0.018239889069905417, "grad_norm": 0.0966796875, "learning_rate": 0.000624, "loss": 1.345, "step": 208 }, { "epoch": 0.01832758084427996, "grad_norm": 0.08740234375, "learning_rate": 0.000627, "loss": 1.3573, "step": 209 }, { "epoch": 0.018415272618654506, "grad_norm": 0.1494140625, "learning_rate": 0.00063, "loss": 1.4277, "step": 210 }, { "epoch": 0.01850296439302905, "grad_norm": 0.0849609375, "learning_rate": 0.000633, "loss": 1.4629, "step": 211 }, { "epoch": 0.0185906561674036, "grad_norm": 0.1142578125, "learning_rate": 0.000636, "loss": 1.4655, "step": 212 }, { "epoch": 0.018678347941778143, "grad_norm": 0.1328125, "learning_rate": 0.000639, "loss": 1.4401, "step": 213 }, { "epoch": 0.018766039716152687, "grad_norm": 0.1025390625, "learning_rate": 0.000642, "loss": 1.4852, "step": 214 }, { "epoch": 0.018853731490527232, "grad_norm": 0.099609375, "learning_rate": 0.000645, "loss": 1.3802, "step": 215 }, { "epoch": 0.01894142326490178, "grad_norm": 0.111328125, "learning_rate": 0.000648, "loss": 1.4288, "step": 216 }, { "epoch": 0.019029115039276324, "grad_norm": 0.126953125, "learning_rate": 0.000651, "loss": 1.467, "step": 217 }, { "epoch": 0.01911680681365087, "grad_norm": 0.11376953125, "learning_rate": 0.000654, "loss": 1.4569, "step": 218 }, { "epoch": 0.019204498588025413, "grad_norm": 0.1064453125, "learning_rate": 0.000657, "loss": 1.4554, "step": 219 }, { "epoch": 0.019292190362399958, "grad_norm": 0.154296875, "learning_rate": 0.00066, "loss": 1.4055, "step": 220 }, { "epoch": 0.019379882136774506, "grad_norm": 0.08837890625, "learning_rate": 0.0006630000000000001, "loss": 1.4994, "step": 221 }, { "epoch": 0.01946757391114905, "grad_norm": 0.11767578125, "learning_rate": 0.000666, "loss": 1.4716, "step": 222 }, { "epoch": 0.019555265685523595, "grad_norm": 0.10498046875, "learning_rate": 0.000669, "loss": 1.3835, "step": 223 }, { "epoch": 0.01964295745989814, "grad_norm": 0.11474609375, "learning_rate": 0.0006720000000000001, "loss": 1.3807, "step": 224 }, { "epoch": 0.019730649234272687, "grad_norm": 0.1123046875, "learning_rate": 0.000675, "loss": 1.4361, "step": 225 }, { "epoch": 0.019818341008647232, "grad_norm": 0.109375, "learning_rate": 0.000678, "loss": 1.4103, "step": 226 }, { "epoch": 0.019906032783021776, "grad_norm": 0.1201171875, "learning_rate": 0.0006810000000000001, "loss": 1.4801, "step": 227 }, { "epoch": 0.01999372455739632, "grad_norm": 0.10107421875, "learning_rate": 0.000684, "loss": 1.4361, "step": 228 }, { "epoch": 0.020081416331770865, "grad_norm": 0.11328125, "learning_rate": 0.000687, "loss": 1.4731, "step": 229 }, { "epoch": 0.020169108106145413, "grad_norm": 0.11962890625, "learning_rate": 0.0006900000000000001, "loss": 1.4505, "step": 230 }, { "epoch": 0.020256799880519958, "grad_norm": 0.1484375, "learning_rate": 0.000693, "loss": 1.3999, "step": 231 }, { "epoch": 0.020344491654894502, "grad_norm": 0.0859375, "learning_rate": 0.000696, "loss": 1.4207, "step": 232 }, { "epoch": 0.020432183429269047, "grad_norm": 0.1025390625, "learning_rate": 0.0006990000000000001, "loss": 1.4174, "step": 233 }, { "epoch": 0.02051987520364359, "grad_norm": 0.1279296875, "learning_rate": 0.000702, "loss": 1.4266, "step": 234 }, { "epoch": 0.02060756697801814, "grad_norm": 0.10595703125, "learning_rate": 0.000705, "loss": 1.3834, "step": 235 }, { "epoch": 0.020695258752392684, "grad_norm": 0.09326171875, "learning_rate": 0.000708, "loss": 1.3796, "step": 236 }, { "epoch": 0.02078295052676723, "grad_norm": 0.1455078125, "learning_rate": 0.0007109999999999999, "loss": 1.4036, "step": 237 }, { "epoch": 0.020870642301141773, "grad_norm": 0.1220703125, "learning_rate": 0.000714, "loss": 1.3561, "step": 238 }, { "epoch": 0.02095833407551632, "grad_norm": 0.0849609375, "learning_rate": 0.000717, "loss": 1.4045, "step": 239 }, { "epoch": 0.021046025849890865, "grad_norm": 0.1435546875, "learning_rate": 0.0007199999999999999, "loss": 1.41, "step": 240 }, { "epoch": 0.02113371762426541, "grad_norm": 0.1396484375, "learning_rate": 0.000723, "loss": 1.4192, "step": 241 }, { "epoch": 0.021221409398639954, "grad_norm": 0.11328125, "learning_rate": 0.000726, "loss": 1.3841, "step": 242 }, { "epoch": 0.0213091011730145, "grad_norm": 0.1240234375, "learning_rate": 0.000729, "loss": 1.3593, "step": 243 }, { "epoch": 0.021396792947389047, "grad_norm": 0.1181640625, "learning_rate": 0.000732, "loss": 1.3946, "step": 244 }, { "epoch": 0.02148448472176359, "grad_norm": 0.10400390625, "learning_rate": 0.000735, "loss": 1.4397, "step": 245 }, { "epoch": 0.021572176496138136, "grad_norm": 0.10595703125, "learning_rate": 0.000738, "loss": 1.3494, "step": 246 }, { "epoch": 0.02165986827051268, "grad_norm": 0.1396484375, "learning_rate": 0.000741, "loss": 1.4703, "step": 247 }, { "epoch": 0.02174756004488723, "grad_norm": 0.11181640625, "learning_rate": 0.000744, "loss": 1.3589, "step": 248 }, { "epoch": 0.021835251819261773, "grad_norm": 0.1220703125, "learning_rate": 0.000747, "loss": 1.3954, "step": 249 }, { "epoch": 0.021922943593636317, "grad_norm": 0.1044921875, "learning_rate": 0.00075, "loss": 1.3917, "step": 250 }, { "epoch": 0.022010635368010862, "grad_norm": 0.11376953125, "learning_rate": 0.000753, "loss": 1.3834, "step": 251 }, { "epoch": 0.022098327142385406, "grad_norm": 0.0986328125, "learning_rate": 0.000756, "loss": 1.4159, "step": 252 }, { "epoch": 0.022186018916759954, "grad_norm": 0.103515625, "learning_rate": 0.000759, "loss": 1.4357, "step": 253 }, { "epoch": 0.0222737106911345, "grad_norm": 0.10888671875, "learning_rate": 0.000762, "loss": 1.4194, "step": 254 }, { "epoch": 0.022361402465509043, "grad_norm": 0.09375, "learning_rate": 0.0007650000000000001, "loss": 1.3669, "step": 255 }, { "epoch": 0.022449094239883588, "grad_norm": 0.095703125, "learning_rate": 0.000768, "loss": 1.4522, "step": 256 }, { "epoch": 0.022536786014258136, "grad_norm": 0.1103515625, "learning_rate": 0.000771, "loss": 1.3672, "step": 257 }, { "epoch": 0.02262447778863268, "grad_norm": 0.08984375, "learning_rate": 0.0007740000000000001, "loss": 1.3274, "step": 258 }, { "epoch": 0.022712169563007225, "grad_norm": 0.1298828125, "learning_rate": 0.000777, "loss": 1.3584, "step": 259 }, { "epoch": 0.02279986133738177, "grad_norm": 0.12158203125, "learning_rate": 0.0007800000000000001, "loss": 1.3933, "step": 260 }, { "epoch": 0.022887553111756314, "grad_norm": 0.10888671875, "learning_rate": 0.0007830000000000001, "loss": 1.4353, "step": 261 }, { "epoch": 0.022975244886130862, "grad_norm": 0.10205078125, "learning_rate": 0.000786, "loss": 1.3784, "step": 262 }, { "epoch": 0.023062936660505406, "grad_norm": 0.1708984375, "learning_rate": 0.0007890000000000001, "loss": 1.4389, "step": 263 }, { "epoch": 0.02315062843487995, "grad_norm": 0.10791015625, "learning_rate": 0.0007920000000000001, "loss": 1.3353, "step": 264 }, { "epoch": 0.023238320209254495, "grad_norm": 0.208984375, "learning_rate": 0.000795, "loss": 1.375, "step": 265 }, { "epoch": 0.023326011983629043, "grad_norm": 0.0849609375, "learning_rate": 0.0007980000000000001, "loss": 1.3568, "step": 266 }, { "epoch": 0.023413703758003588, "grad_norm": 0.15625, "learning_rate": 0.0008010000000000001, "loss": 1.3975, "step": 267 }, { "epoch": 0.023501395532378132, "grad_norm": 0.09716796875, "learning_rate": 0.000804, "loss": 1.4104, "step": 268 }, { "epoch": 0.023589087306752677, "grad_norm": 0.1201171875, "learning_rate": 0.0008070000000000001, "loss": 1.4314, "step": 269 }, { "epoch": 0.02367677908112722, "grad_norm": 0.08935546875, "learning_rate": 0.0008100000000000001, "loss": 1.3977, "step": 270 }, { "epoch": 0.02376447085550177, "grad_norm": 0.087890625, "learning_rate": 0.000813, "loss": 1.4509, "step": 271 }, { "epoch": 0.023852162629876314, "grad_norm": 0.154296875, "learning_rate": 0.0008160000000000001, "loss": 1.4231, "step": 272 }, { "epoch": 0.02393985440425086, "grad_norm": 0.099609375, "learning_rate": 0.0008190000000000001, "loss": 1.381, "step": 273 }, { "epoch": 0.024027546178625403, "grad_norm": 0.224609375, "learning_rate": 0.000822, "loss": 1.423, "step": 274 }, { "epoch": 0.02411523795299995, "grad_norm": 0.16015625, "learning_rate": 0.0008250000000000001, "loss": 1.4066, "step": 275 }, { "epoch": 0.024202929727374495, "grad_norm": 0.189453125, "learning_rate": 0.0008280000000000001, "loss": 1.374, "step": 276 }, { "epoch": 0.02429062150174904, "grad_norm": 0.341796875, "learning_rate": 0.0008310000000000001, "loss": 1.3806, "step": 277 }, { "epoch": 0.024378313276123584, "grad_norm": 0.1298828125, "learning_rate": 0.0008340000000000001, "loss": 1.4127, "step": 278 }, { "epoch": 0.02446600505049813, "grad_norm": 0.220703125, "learning_rate": 0.0008370000000000001, "loss": 1.3966, "step": 279 }, { "epoch": 0.024553696824872677, "grad_norm": 0.2138671875, "learning_rate": 0.0008400000000000001, "loss": 1.401, "step": 280 }, { "epoch": 0.02464138859924722, "grad_norm": 0.1513671875, "learning_rate": 0.0008430000000000001, "loss": 1.4582, "step": 281 }, { "epoch": 0.024729080373621766, "grad_norm": 0.138671875, "learning_rate": 0.000846, "loss": 1.3452, "step": 282 }, { "epoch": 0.02481677214799631, "grad_norm": 0.09814453125, "learning_rate": 0.0008489999999999999, "loss": 1.3579, "step": 283 }, { "epoch": 0.02490446392237086, "grad_norm": 0.162109375, "learning_rate": 0.0008519999999999999, "loss": 1.3803, "step": 284 }, { "epoch": 0.024992155696745403, "grad_norm": 0.1279296875, "learning_rate": 0.000855, "loss": 1.3182, "step": 285 }, { "epoch": 0.025079847471119947, "grad_norm": 0.1884765625, "learning_rate": 0.0008579999999999999, "loss": 1.4524, "step": 286 }, { "epoch": 0.025167539245494492, "grad_norm": 0.126953125, "learning_rate": 0.000861, "loss": 1.3664, "step": 287 }, { "epoch": 0.025255231019869036, "grad_norm": 0.1640625, "learning_rate": 0.000864, "loss": 1.4199, "step": 288 }, { "epoch": 0.025342922794243584, "grad_norm": 0.1591796875, "learning_rate": 0.0008669999999999999, "loss": 1.3351, "step": 289 }, { "epoch": 0.02543061456861813, "grad_norm": 0.111328125, "learning_rate": 0.00087, "loss": 1.3552, "step": 290 }, { "epoch": 0.025518306342992673, "grad_norm": 0.193359375, "learning_rate": 0.000873, "loss": 1.3583, "step": 291 }, { "epoch": 0.025605998117367218, "grad_norm": 0.10205078125, "learning_rate": 0.0008759999999999999, "loss": 1.3731, "step": 292 }, { "epoch": 0.025693689891741762, "grad_norm": 0.181640625, "learning_rate": 0.000879, "loss": 1.3517, "step": 293 }, { "epoch": 0.02578138166611631, "grad_norm": 0.09521484375, "learning_rate": 0.000882, "loss": 1.4209, "step": 294 }, { "epoch": 0.025869073440490855, "grad_norm": 0.1357421875, "learning_rate": 0.0008849999999999999, "loss": 1.3834, "step": 295 }, { "epoch": 0.0259567652148654, "grad_norm": 0.12353515625, "learning_rate": 0.000888, "loss": 1.4558, "step": 296 }, { "epoch": 0.026044456989239944, "grad_norm": 0.10791015625, "learning_rate": 0.000891, "loss": 1.4401, "step": 297 }, { "epoch": 0.026132148763614492, "grad_norm": 0.1630859375, "learning_rate": 0.0008939999999999999, "loss": 1.3981, "step": 298 }, { "epoch": 0.026219840537989036, "grad_norm": 0.08740234375, "learning_rate": 0.000897, "loss": 1.3309, "step": 299 }, { "epoch": 0.02630753231236358, "grad_norm": 0.240234375, "learning_rate": 0.0009, "loss": 1.34, "step": 300 }, { "epoch": 0.026395224086738125, "grad_norm": 0.083984375, "learning_rate": 0.0009029999999999999, "loss": 1.3813, "step": 301 }, { "epoch": 0.02648291586111267, "grad_norm": 0.2041015625, "learning_rate": 0.000906, "loss": 1.4285, "step": 302 }, { "epoch": 0.026570607635487218, "grad_norm": 0.11572265625, "learning_rate": 0.000909, "loss": 1.3756, "step": 303 }, { "epoch": 0.026658299409861762, "grad_norm": 0.119140625, "learning_rate": 0.000912, "loss": 1.394, "step": 304 }, { "epoch": 0.026745991184236307, "grad_norm": 0.1552734375, "learning_rate": 0.000915, "loss": 1.4289, "step": 305 }, { "epoch": 0.02683368295861085, "grad_norm": 0.09228515625, "learning_rate": 0.000918, "loss": 1.3585, "step": 306 }, { "epoch": 0.0269213747329854, "grad_norm": 0.193359375, "learning_rate": 0.000921, "loss": 1.3685, "step": 307 }, { "epoch": 0.027009066507359944, "grad_norm": 0.08984375, "learning_rate": 0.000924, "loss": 1.3597, "step": 308 }, { "epoch": 0.02709675828173449, "grad_norm": 0.12451171875, "learning_rate": 0.000927, "loss": 1.3464, "step": 309 }, { "epoch": 0.027184450056109033, "grad_norm": 0.11865234375, "learning_rate": 0.00093, "loss": 1.4275, "step": 310 }, { "epoch": 0.027272141830483577, "grad_norm": 0.10693359375, "learning_rate": 0.000933, "loss": 1.3538, "step": 311 }, { "epoch": 0.027359833604858125, "grad_norm": 0.08740234375, "learning_rate": 0.000936, "loss": 1.3189, "step": 312 }, { "epoch": 0.02744752537923267, "grad_norm": 0.0908203125, "learning_rate": 0.0009390000000000001, "loss": 1.337, "step": 313 }, { "epoch": 0.027535217153607214, "grad_norm": 0.099609375, "learning_rate": 0.000942, "loss": 1.3509, "step": 314 }, { "epoch": 0.02762290892798176, "grad_norm": 0.10302734375, "learning_rate": 0.000945, "loss": 1.4155, "step": 315 }, { "epoch": 0.027710600702356307, "grad_norm": 0.12060546875, "learning_rate": 0.0009480000000000001, "loss": 1.3987, "step": 316 }, { "epoch": 0.02779829247673085, "grad_norm": 0.09765625, "learning_rate": 0.000951, "loss": 1.3961, "step": 317 }, { "epoch": 0.027885984251105396, "grad_norm": 0.10888671875, "learning_rate": 0.000954, "loss": 1.3766, "step": 318 }, { "epoch": 0.02797367602547994, "grad_norm": 0.1171875, "learning_rate": 0.0009570000000000001, "loss": 1.3738, "step": 319 }, { "epoch": 0.028061367799854485, "grad_norm": 0.18359375, "learning_rate": 0.00096, "loss": 1.4111, "step": 320 }, { "epoch": 0.028149059574229033, "grad_norm": 0.111328125, "learning_rate": 0.000963, "loss": 1.344, "step": 321 }, { "epoch": 0.028236751348603577, "grad_norm": 0.2333984375, "learning_rate": 0.0009660000000000001, "loss": 1.3448, "step": 322 }, { "epoch": 0.028324443122978122, "grad_norm": 0.09033203125, "learning_rate": 0.000969, "loss": 1.4107, "step": 323 }, { "epoch": 0.028412134897352666, "grad_norm": 0.1708984375, "learning_rate": 0.0009720000000000001, "loss": 1.4008, "step": 324 }, { "epoch": 0.028499826671727214, "grad_norm": 0.09228515625, "learning_rate": 0.0009750000000000001, "loss": 1.3442, "step": 325 }, { "epoch": 0.02858751844610176, "grad_norm": 0.0966796875, "learning_rate": 0.0009780000000000001, "loss": 1.4229, "step": 326 }, { "epoch": 0.028675210220476303, "grad_norm": 0.1123046875, "learning_rate": 0.000981, "loss": 1.3201, "step": 327 }, { "epoch": 0.028762901994850848, "grad_norm": 0.10595703125, "learning_rate": 0.000984, "loss": 1.4149, "step": 328 }, { "epoch": 0.028850593769225392, "grad_norm": 0.162109375, "learning_rate": 0.000987, "loss": 1.3379, "step": 329 }, { "epoch": 0.02893828554359994, "grad_norm": 0.126953125, "learning_rate": 0.00099, "loss": 1.3845, "step": 330 }, { "epoch": 0.029025977317974485, "grad_norm": 0.126953125, "learning_rate": 0.0009930000000000002, "loss": 1.3366, "step": 331 }, { "epoch": 0.02911366909234903, "grad_norm": 0.130859375, "learning_rate": 0.0009960000000000001, "loss": 1.4185, "step": 332 }, { "epoch": 0.029201360866723574, "grad_norm": 0.126953125, "learning_rate": 0.000999, "loss": 1.3812, "step": 333 }, { "epoch": 0.029289052641098122, "grad_norm": 0.10400390625, "learning_rate": 0.001002, "loss": 1.3699, "step": 334 }, { "epoch": 0.029376744415472666, "grad_norm": 0.1171875, "learning_rate": 0.001005, "loss": 1.3294, "step": 335 }, { "epoch": 0.02946443618984721, "grad_norm": 0.1552734375, "learning_rate": 0.001008, "loss": 1.3399, "step": 336 }, { "epoch": 0.029552127964221755, "grad_norm": 0.09765625, "learning_rate": 0.0010110000000000002, "loss": 1.3641, "step": 337 }, { "epoch": 0.0296398197385963, "grad_norm": 0.1171875, "learning_rate": 0.0010140000000000001, "loss": 1.3034, "step": 338 }, { "epoch": 0.029727511512970848, "grad_norm": 0.09375, "learning_rate": 0.0010170000000000001, "loss": 1.4665, "step": 339 }, { "epoch": 0.029815203287345392, "grad_norm": 0.095703125, "learning_rate": 0.00102, "loss": 1.362, "step": 340 }, { "epoch": 0.029902895061719937, "grad_norm": 0.1328125, "learning_rate": 0.001023, "loss": 1.4033, "step": 341 }, { "epoch": 0.02999058683609448, "grad_norm": 0.091796875, "learning_rate": 0.001026, "loss": 1.3324, "step": 342 }, { "epoch": 0.030078278610469026, "grad_norm": 0.130859375, "learning_rate": 0.0010290000000000002, "loss": 1.3564, "step": 343 }, { "epoch": 0.030165970384843574, "grad_norm": 0.1044921875, "learning_rate": 0.001032, "loss": 1.4015, "step": 344 }, { "epoch": 0.03025366215921812, "grad_norm": 0.1044921875, "learning_rate": 0.001035, "loss": 1.4183, "step": 345 }, { "epoch": 0.030341353933592663, "grad_norm": 0.09716796875, "learning_rate": 0.0010379999999999999, "loss": 1.402, "step": 346 }, { "epoch": 0.030429045707967207, "grad_norm": 0.1279296875, "learning_rate": 0.001041, "loss": 1.4356, "step": 347 }, { "epoch": 0.030516737482341755, "grad_norm": 0.10888671875, "learning_rate": 0.001044, "loss": 1.3444, "step": 348 }, { "epoch": 0.0306044292567163, "grad_norm": 0.1005859375, "learning_rate": 0.001047, "loss": 1.2408, "step": 349 }, { "epoch": 0.030692121031090844, "grad_norm": 0.1181640625, "learning_rate": 0.00105, "loss": 1.3412, "step": 350 }, { "epoch": 0.03077981280546539, "grad_norm": 0.107421875, "learning_rate": 0.001053, "loss": 1.3695, "step": 351 }, { "epoch": 0.030867504579839933, "grad_norm": 0.10498046875, "learning_rate": 0.0010559999999999999, "loss": 1.2612, "step": 352 }, { "epoch": 0.03095519635421448, "grad_norm": 0.126953125, "learning_rate": 0.001059, "loss": 1.3209, "step": 353 }, { "epoch": 0.031042888128589026, "grad_norm": 0.11181640625, "learning_rate": 0.001062, "loss": 1.3619, "step": 354 }, { "epoch": 0.03113057990296357, "grad_norm": 0.11669921875, "learning_rate": 0.001065, "loss": 1.3898, "step": 355 }, { "epoch": 0.031218271677338115, "grad_norm": 0.1083984375, "learning_rate": 0.001068, "loss": 1.3756, "step": 356 }, { "epoch": 0.03130596345171266, "grad_norm": 0.09814453125, "learning_rate": 0.001071, "loss": 1.3883, "step": 357 }, { "epoch": 0.031393655226087204, "grad_norm": 0.1015625, "learning_rate": 0.001074, "loss": 1.3457, "step": 358 }, { "epoch": 0.031481347000461755, "grad_norm": 0.1318359375, "learning_rate": 0.001077, "loss": 1.3499, "step": 359 }, { "epoch": 0.0315690387748363, "grad_norm": 0.125, "learning_rate": 0.00108, "loss": 1.385, "step": 360 }, { "epoch": 0.031656730549210844, "grad_norm": 0.0966796875, "learning_rate": 0.001083, "loss": 1.3921, "step": 361 }, { "epoch": 0.03174442232358539, "grad_norm": 0.10888671875, "learning_rate": 0.001086, "loss": 1.3615, "step": 362 }, { "epoch": 0.03183211409795993, "grad_norm": 0.1376953125, "learning_rate": 0.001089, "loss": 1.3125, "step": 363 }, { "epoch": 0.03191980587233448, "grad_norm": 0.2080078125, "learning_rate": 0.001092, "loss": 1.3186, "step": 364 }, { "epoch": 0.03200749764670902, "grad_norm": 0.09375, "learning_rate": 0.001095, "loss": 1.364, "step": 365 }, { "epoch": 0.03209518942108357, "grad_norm": 0.205078125, "learning_rate": 0.001098, "loss": 1.4273, "step": 366 }, { "epoch": 0.03218288119545811, "grad_norm": 0.09375, "learning_rate": 0.001101, "loss": 1.3585, "step": 367 }, { "epoch": 0.032270572969832656, "grad_norm": 0.111328125, "learning_rate": 0.001104, "loss": 1.3809, "step": 368 }, { "epoch": 0.03235826474420721, "grad_norm": 0.11572265625, "learning_rate": 0.001107, "loss": 1.4134, "step": 369 }, { "epoch": 0.03244595651858175, "grad_norm": 0.154296875, "learning_rate": 0.00111, "loss": 1.3046, "step": 370 }, { "epoch": 0.032533648292956296, "grad_norm": 0.1455078125, "learning_rate": 0.001113, "loss": 1.44, "step": 371 }, { "epoch": 0.03262134006733084, "grad_norm": 0.10400390625, "learning_rate": 0.001116, "loss": 1.3767, "step": 372 }, { "epoch": 0.032709031841705385, "grad_norm": 0.10400390625, "learning_rate": 0.001119, "loss": 1.3568, "step": 373 }, { "epoch": 0.03279672361607993, "grad_norm": 0.10302734375, "learning_rate": 0.001122, "loss": 1.2876, "step": 374 }, { "epoch": 0.032884415390454474, "grad_norm": 0.09326171875, "learning_rate": 0.0011250000000000001, "loss": 1.3378, "step": 375 }, { "epoch": 0.03297210716482902, "grad_norm": 0.12255859375, "learning_rate": 0.001128, "loss": 1.2907, "step": 376 }, { "epoch": 0.033059798939203563, "grad_norm": 0.091796875, "learning_rate": 0.001131, "loss": 1.3901, "step": 377 }, { "epoch": 0.033147490713578115, "grad_norm": 0.12158203125, "learning_rate": 0.001134, "loss": 1.3821, "step": 378 }, { "epoch": 0.03323518248795266, "grad_norm": 0.083984375, "learning_rate": 0.001137, "loss": 1.3682, "step": 379 }, { "epoch": 0.033322874262327204, "grad_norm": 0.1123046875, "learning_rate": 0.00114, "loss": 1.3609, "step": 380 }, { "epoch": 0.03341056603670175, "grad_norm": 0.0966796875, "learning_rate": 0.0011430000000000001, "loss": 1.3734, "step": 381 }, { "epoch": 0.03349825781107629, "grad_norm": 0.1591796875, "learning_rate": 0.001146, "loss": 1.426, "step": 382 }, { "epoch": 0.03358594958545084, "grad_norm": 0.1376953125, "learning_rate": 0.001149, "loss": 1.3942, "step": 383 }, { "epoch": 0.03367364135982538, "grad_norm": 0.0908203125, "learning_rate": 0.001152, "loss": 1.3952, "step": 384 }, { "epoch": 0.033761333134199926, "grad_norm": 0.09033203125, "learning_rate": 0.001155, "loss": 1.4302, "step": 385 }, { "epoch": 0.03384902490857447, "grad_norm": 0.10009765625, "learning_rate": 0.001158, "loss": 1.3961, "step": 386 }, { "epoch": 0.03393671668294902, "grad_norm": 0.10888671875, "learning_rate": 0.0011610000000000001, "loss": 1.3277, "step": 387 }, { "epoch": 0.03402440845732357, "grad_norm": 0.09765625, "learning_rate": 0.001164, "loss": 1.2633, "step": 388 }, { "epoch": 0.03411210023169811, "grad_norm": 0.1220703125, "learning_rate": 0.001167, "loss": 1.3971, "step": 389 }, { "epoch": 0.034199792006072656, "grad_norm": 0.09619140625, "learning_rate": 0.00117, "loss": 1.3659, "step": 390 }, { "epoch": 0.0342874837804472, "grad_norm": 0.1640625, "learning_rate": 0.001173, "loss": 1.4147, "step": 391 }, { "epoch": 0.034375175554821745, "grad_norm": 0.10595703125, "learning_rate": 0.001176, "loss": 1.3504, "step": 392 }, { "epoch": 0.03446286732919629, "grad_norm": 0.15625, "learning_rate": 0.0011790000000000001, "loss": 1.3842, "step": 393 }, { "epoch": 0.034550559103570834, "grad_norm": 0.11279296875, "learning_rate": 0.001182, "loss": 1.4277, "step": 394 }, { "epoch": 0.03463825087794538, "grad_norm": 0.142578125, "learning_rate": 0.001185, "loss": 1.3052, "step": 395 }, { "epoch": 0.03472594265231993, "grad_norm": 0.140625, "learning_rate": 0.001188, "loss": 1.41, "step": 396 }, { "epoch": 0.034813634426694474, "grad_norm": 0.154296875, "learning_rate": 0.001191, "loss": 1.3605, "step": 397 }, { "epoch": 0.03490132620106902, "grad_norm": 0.1572265625, "learning_rate": 0.0011940000000000002, "loss": 1.3762, "step": 398 }, { "epoch": 0.03498901797544356, "grad_norm": 0.1845703125, "learning_rate": 0.0011970000000000001, "loss": 1.2598, "step": 399 }, { "epoch": 0.03507670974981811, "grad_norm": 0.2236328125, "learning_rate": 0.0012000000000000001, "loss": 1.352, "step": 400 }, { "epoch": 0.03516440152419265, "grad_norm": 0.1259765625, "learning_rate": 0.001203, "loss": 1.3672, "step": 401 }, { "epoch": 0.0352520932985672, "grad_norm": 0.1767578125, "learning_rate": 0.001206, "loss": 1.4493, "step": 402 }, { "epoch": 0.03533978507294174, "grad_norm": 0.2236328125, "learning_rate": 0.001209, "loss": 1.3753, "step": 403 }, { "epoch": 0.035427476847316286, "grad_norm": 0.15234375, "learning_rate": 0.0012120000000000002, "loss": 1.3092, "step": 404 }, { "epoch": 0.03551516862169084, "grad_norm": 0.12890625, "learning_rate": 0.0012150000000000002, "loss": 1.3083, "step": 405 }, { "epoch": 0.03560286039606538, "grad_norm": 0.1025390625, "learning_rate": 0.0012180000000000001, "loss": 1.2666, "step": 406 }, { "epoch": 0.035690552170439926, "grad_norm": 0.18359375, "learning_rate": 0.0012209999999999999, "loss": 1.31, "step": 407 }, { "epoch": 0.03577824394481447, "grad_norm": 0.130859375, "learning_rate": 0.001224, "loss": 1.3723, "step": 408 }, { "epoch": 0.035865935719189015, "grad_norm": 0.10400390625, "learning_rate": 0.001227, "loss": 1.296, "step": 409 }, { "epoch": 0.03595362749356356, "grad_norm": 0.1689453125, "learning_rate": 0.00123, "loss": 1.3209, "step": 410 }, { "epoch": 0.036041319267938104, "grad_norm": 0.0869140625, "learning_rate": 0.001233, "loss": 1.3439, "step": 411 }, { "epoch": 0.03612901104231265, "grad_norm": 0.1640625, "learning_rate": 0.001236, "loss": 1.361, "step": 412 }, { "epoch": 0.036216702816687193, "grad_norm": 0.150390625, "learning_rate": 0.0012389999999999999, "loss": 1.3845, "step": 413 }, { "epoch": 0.036304394591061745, "grad_norm": 0.12255859375, "learning_rate": 0.001242, "loss": 1.3486, "step": 414 }, { "epoch": 0.03639208636543629, "grad_norm": 0.173828125, "learning_rate": 0.001245, "loss": 1.3611, "step": 415 }, { "epoch": 0.036479778139810834, "grad_norm": 0.158203125, "learning_rate": 0.001248, "loss": 1.3968, "step": 416 }, { "epoch": 0.03656746991418538, "grad_norm": 0.1904296875, "learning_rate": 0.001251, "loss": 1.3516, "step": 417 }, { "epoch": 0.03665516168855992, "grad_norm": 0.216796875, "learning_rate": 0.001254, "loss": 1.3711, "step": 418 }, { "epoch": 0.03674285346293447, "grad_norm": 0.1689453125, "learning_rate": 0.0012569999999999999, "loss": 1.3854, "step": 419 }, { "epoch": 0.03683054523730901, "grad_norm": 0.1650390625, "learning_rate": 0.00126, "loss": 1.3558, "step": 420 }, { "epoch": 0.036918237011683556, "grad_norm": 0.1728515625, "learning_rate": 0.001263, "loss": 1.2976, "step": 421 }, { "epoch": 0.0370059287860581, "grad_norm": 0.103515625, "learning_rate": 0.001266, "loss": 1.3582, "step": 422 }, { "epoch": 0.03709362056043265, "grad_norm": 0.1689453125, "learning_rate": 0.001269, "loss": 1.3893, "step": 423 }, { "epoch": 0.0371813123348072, "grad_norm": 0.10986328125, "learning_rate": 0.001272, "loss": 1.2884, "step": 424 }, { "epoch": 0.03726900410918174, "grad_norm": 0.1181640625, "learning_rate": 0.001275, "loss": 1.3434, "step": 425 }, { "epoch": 0.037356695883556286, "grad_norm": 0.1044921875, "learning_rate": 0.001278, "loss": 1.3381, "step": 426 }, { "epoch": 0.03744438765793083, "grad_norm": 0.140625, "learning_rate": 0.001281, "loss": 1.3903, "step": 427 }, { "epoch": 0.037532079432305375, "grad_norm": 0.08935546875, "learning_rate": 0.001284, "loss": 1.3506, "step": 428 }, { "epoch": 0.03761977120667992, "grad_norm": 0.09814453125, "learning_rate": 0.001287, "loss": 1.2621, "step": 429 }, { "epoch": 0.037707462981054464, "grad_norm": 0.09814453125, "learning_rate": 0.00129, "loss": 1.3424, "step": 430 }, { "epoch": 0.03779515475542901, "grad_norm": 0.1162109375, "learning_rate": 0.001293, "loss": 1.4801, "step": 431 }, { "epoch": 0.03788284652980356, "grad_norm": 0.09814453125, "learning_rate": 0.001296, "loss": 1.3269, "step": 432 }, { "epoch": 0.037970538304178104, "grad_norm": 0.138671875, "learning_rate": 0.001299, "loss": 1.3656, "step": 433 }, { "epoch": 0.03805823007855265, "grad_norm": 0.08837890625, "learning_rate": 0.001302, "loss": 1.3883, "step": 434 }, { "epoch": 0.03814592185292719, "grad_norm": 0.1044921875, "learning_rate": 0.001305, "loss": 1.3636, "step": 435 }, { "epoch": 0.03823361362730174, "grad_norm": 0.09130859375, "learning_rate": 0.001308, "loss": 1.3846, "step": 436 }, { "epoch": 0.03832130540167628, "grad_norm": 0.1201171875, "learning_rate": 0.001311, "loss": 1.3506, "step": 437 }, { "epoch": 0.03840899717605083, "grad_norm": 0.0859375, "learning_rate": 0.001314, "loss": 1.3023, "step": 438 }, { "epoch": 0.03849668895042537, "grad_norm": 0.109375, "learning_rate": 0.001317, "loss": 1.3817, "step": 439 }, { "epoch": 0.038584380724799916, "grad_norm": 0.1435546875, "learning_rate": 0.00132, "loss": 1.3687, "step": 440 }, { "epoch": 0.03867207249917447, "grad_norm": 0.13671875, "learning_rate": 0.001323, "loss": 1.3563, "step": 441 }, { "epoch": 0.03875976427354901, "grad_norm": 0.08837890625, "learning_rate": 0.0013260000000000001, "loss": 1.3338, "step": 442 }, { "epoch": 0.038847456047923556, "grad_norm": 0.1103515625, "learning_rate": 0.001329, "loss": 1.3922, "step": 443 }, { "epoch": 0.0389351478222981, "grad_norm": 0.12158203125, "learning_rate": 0.001332, "loss": 1.2568, "step": 444 }, { "epoch": 0.039022839596672645, "grad_norm": 0.1337890625, "learning_rate": 0.001335, "loss": 1.3628, "step": 445 }, { "epoch": 0.03911053137104719, "grad_norm": 0.1064453125, "learning_rate": 0.001338, "loss": 1.3751, "step": 446 }, { "epoch": 0.039198223145421734, "grad_norm": 0.126953125, "learning_rate": 0.001341, "loss": 1.3495, "step": 447 }, { "epoch": 0.03928591491979628, "grad_norm": 0.1474609375, "learning_rate": 0.0013440000000000001, "loss": 1.317, "step": 448 }, { "epoch": 0.039373606694170823, "grad_norm": 0.09765625, "learning_rate": 0.001347, "loss": 1.3535, "step": 449 }, { "epoch": 0.039461298468545375, "grad_norm": 0.12890625, "learning_rate": 0.00135, "loss": 1.2891, "step": 450 }, { "epoch": 0.03954899024291992, "grad_norm": 0.1376953125, "learning_rate": 0.001353, "loss": 1.3959, "step": 451 }, { "epoch": 0.039636682017294464, "grad_norm": 0.154296875, "learning_rate": 0.001356, "loss": 1.3192, "step": 452 }, { "epoch": 0.03972437379166901, "grad_norm": 0.1572265625, "learning_rate": 0.001359, "loss": 1.4324, "step": 453 }, { "epoch": 0.03981206556604355, "grad_norm": 0.0986328125, "learning_rate": 0.0013620000000000001, "loss": 1.3246, "step": 454 }, { "epoch": 0.0398997573404181, "grad_norm": 0.138671875, "learning_rate": 0.0013650000000000001, "loss": 1.2848, "step": 455 }, { "epoch": 0.03998744911479264, "grad_norm": 0.142578125, "learning_rate": 0.001368, "loss": 1.3973, "step": 456 }, { "epoch": 0.040075140889167186, "grad_norm": 0.1572265625, "learning_rate": 0.001371, "loss": 1.3794, "step": 457 }, { "epoch": 0.04016283266354173, "grad_norm": 0.099609375, "learning_rate": 0.001374, "loss": 1.2708, "step": 458 }, { "epoch": 0.04025052443791628, "grad_norm": 0.171875, "learning_rate": 0.0013770000000000002, "loss": 1.3587, "step": 459 }, { "epoch": 0.04033821621229083, "grad_norm": 0.1611328125, "learning_rate": 0.0013800000000000002, "loss": 1.2727, "step": 460 }, { "epoch": 0.04042590798666537, "grad_norm": 0.171875, "learning_rate": 0.0013830000000000001, "loss": 1.3551, "step": 461 }, { "epoch": 0.040513599761039916, "grad_norm": 0.1328125, "learning_rate": 0.001386, "loss": 1.3897, "step": 462 }, { "epoch": 0.04060129153541446, "grad_norm": 0.14453125, "learning_rate": 0.001389, "loss": 1.3319, "step": 463 }, { "epoch": 0.040688983309789005, "grad_norm": 0.16796875, "learning_rate": 0.001392, "loss": 1.2967, "step": 464 }, { "epoch": 0.04077667508416355, "grad_norm": 0.138671875, "learning_rate": 0.0013950000000000002, "loss": 1.3376, "step": 465 }, { "epoch": 0.040864366858538094, "grad_norm": 0.146484375, "learning_rate": 0.0013980000000000002, "loss": 1.3628, "step": 466 }, { "epoch": 0.04095205863291264, "grad_norm": 0.1982421875, "learning_rate": 0.0014010000000000001, "loss": 1.2868, "step": 467 }, { "epoch": 0.04103975040728718, "grad_norm": 0.0947265625, "learning_rate": 0.001404, "loss": 1.3618, "step": 468 }, { "epoch": 0.041127442181661734, "grad_norm": 0.2197265625, "learning_rate": 0.001407, "loss": 1.3419, "step": 469 }, { "epoch": 0.04121513395603628, "grad_norm": 0.09765625, "learning_rate": 0.00141, "loss": 1.3019, "step": 470 }, { "epoch": 0.04130282573041082, "grad_norm": 0.2470703125, "learning_rate": 0.001413, "loss": 1.4217, "step": 471 }, { "epoch": 0.04139051750478537, "grad_norm": 0.0986328125, "learning_rate": 0.001416, "loss": 1.3689, "step": 472 }, { "epoch": 0.04147820927915991, "grad_norm": 0.173828125, "learning_rate": 0.001419, "loss": 1.3036, "step": 473 }, { "epoch": 0.04156590105353446, "grad_norm": 0.0986328125, "learning_rate": 0.0014219999999999999, "loss": 1.3835, "step": 474 }, { "epoch": 0.041653592827909, "grad_norm": 0.1611328125, "learning_rate": 0.001425, "loss": 1.2816, "step": 475 }, { "epoch": 0.041741284602283546, "grad_norm": 0.107421875, "learning_rate": 0.001428, "loss": 1.3424, "step": 476 }, { "epoch": 0.04182897637665809, "grad_norm": 0.1044921875, "learning_rate": 0.001431, "loss": 1.3255, "step": 477 }, { "epoch": 0.04191666815103264, "grad_norm": 0.11376953125, "learning_rate": 0.001434, "loss": 1.358, "step": 478 }, { "epoch": 0.042004359925407186, "grad_norm": 0.103515625, "learning_rate": 0.001437, "loss": 1.3379, "step": 479 }, { "epoch": 0.04209205169978173, "grad_norm": 0.1162109375, "learning_rate": 0.0014399999999999999, "loss": 1.2963, "step": 480 }, { "epoch": 0.042179743474156275, "grad_norm": 0.08984375, "learning_rate": 0.001443, "loss": 1.2964, "step": 481 }, { "epoch": 0.04226743524853082, "grad_norm": 0.08837890625, "learning_rate": 0.001446, "loss": 1.2804, "step": 482 }, { "epoch": 0.042355127022905364, "grad_norm": 0.1181640625, "learning_rate": 0.001449, "loss": 1.353, "step": 483 }, { "epoch": 0.04244281879727991, "grad_norm": 0.140625, "learning_rate": 0.001452, "loss": 1.2805, "step": 484 }, { "epoch": 0.042530510571654453, "grad_norm": 0.130859375, "learning_rate": 0.001455, "loss": 1.3353, "step": 485 }, { "epoch": 0.042618202346029, "grad_norm": 0.1845703125, "learning_rate": 0.001458, "loss": 1.4368, "step": 486 }, { "epoch": 0.04270589412040355, "grad_norm": 0.1416015625, "learning_rate": 0.001461, "loss": 1.355, "step": 487 }, { "epoch": 0.042793585894778094, "grad_norm": 0.1318359375, "learning_rate": 0.001464, "loss": 1.3571, "step": 488 }, { "epoch": 0.04288127766915264, "grad_norm": 0.12890625, "learning_rate": 0.001467, "loss": 1.3144, "step": 489 }, { "epoch": 0.04296896944352718, "grad_norm": 0.09716796875, "learning_rate": 0.00147, "loss": 1.3431, "step": 490 }, { "epoch": 0.04305666121790173, "grad_norm": 0.119140625, "learning_rate": 0.001473, "loss": 1.3331, "step": 491 }, { "epoch": 0.04314435299227627, "grad_norm": 0.10205078125, "learning_rate": 0.001476, "loss": 1.3873, "step": 492 }, { "epoch": 0.043232044766650816, "grad_norm": 0.1181640625, "learning_rate": 0.001479, "loss": 1.3456, "step": 493 }, { "epoch": 0.04331973654102536, "grad_norm": 0.10107421875, "learning_rate": 0.001482, "loss": 1.354, "step": 494 }, { "epoch": 0.043407428315399905, "grad_norm": 0.1513671875, "learning_rate": 0.001485, "loss": 1.3155, "step": 495 }, { "epoch": 0.04349512008977446, "grad_norm": 0.1318359375, "learning_rate": 0.001488, "loss": 1.3318, "step": 496 }, { "epoch": 0.043582811864149, "grad_norm": 0.1318359375, "learning_rate": 0.001491, "loss": 1.3467, "step": 497 }, { "epoch": 0.043670503638523546, "grad_norm": 0.1005859375, "learning_rate": 0.001494, "loss": 1.33, "step": 498 }, { "epoch": 0.04375819541289809, "grad_norm": 0.09765625, "learning_rate": 0.001497, "loss": 1.3274, "step": 499 }, { "epoch": 0.043845887187272635, "grad_norm": 0.138671875, "learning_rate": 0.0015, "loss": 1.3365, "step": 500 }, { "epoch": 0.043845887187272635, "eval_loss": 1.3519667387008667, "eval_runtime": 427.9384, "eval_samples_per_second": 33.76, "eval_steps_per_second": 8.44, "step": 500 }, { "epoch": 0.04393357896164718, "grad_norm": 0.1767578125, "learning_rate": 0.001503, "loss": 1.3827, "step": 501 }, { "epoch": 0.044021270736021724, "grad_norm": 0.08984375, "learning_rate": 0.001506, "loss": 1.3601, "step": 502 }, { "epoch": 0.04410896251039627, "grad_norm": 0.1298828125, "learning_rate": 0.0015090000000000001, "loss": 1.3613, "step": 503 }, { "epoch": 0.04419665428477081, "grad_norm": 0.15234375, "learning_rate": 0.001512, "loss": 1.3476, "step": 504 }, { "epoch": 0.044284346059145364, "grad_norm": 0.09619140625, "learning_rate": 0.001515, "loss": 1.382, "step": 505 }, { "epoch": 0.04437203783351991, "grad_norm": 0.134765625, "learning_rate": 0.001518, "loss": 1.3764, "step": 506 }, { "epoch": 0.04445972960789445, "grad_norm": 0.09130859375, "learning_rate": 0.001521, "loss": 1.265, "step": 507 }, { "epoch": 0.044547421382269, "grad_norm": 0.0908203125, "learning_rate": 0.001524, "loss": 1.3309, "step": 508 }, { "epoch": 0.04463511315664354, "grad_norm": 0.095703125, "learning_rate": 0.0015270000000000001, "loss": 1.3333, "step": 509 }, { "epoch": 0.04472280493101809, "grad_norm": 0.09765625, "learning_rate": 0.0015300000000000001, "loss": 1.3741, "step": 510 }, { "epoch": 0.04481049670539263, "grad_norm": 0.1279296875, "learning_rate": 0.001533, "loss": 1.3363, "step": 511 }, { "epoch": 0.044898188479767176, "grad_norm": 0.12890625, "learning_rate": 0.001536, "loss": 1.3045, "step": 512 }, { "epoch": 0.04498588025414172, "grad_norm": 0.15625, "learning_rate": 0.001539, "loss": 1.4171, "step": 513 }, { "epoch": 0.04507357202851627, "grad_norm": 0.0849609375, "learning_rate": 0.001542, "loss": 1.3343, "step": 514 }, { "epoch": 0.045161263802890816, "grad_norm": 0.1640625, "learning_rate": 0.0015450000000000001, "loss": 1.3433, "step": 515 }, { "epoch": 0.04524895557726536, "grad_norm": 0.09912109375, "learning_rate": 0.0015480000000000001, "loss": 1.281, "step": 516 }, { "epoch": 0.045336647351639905, "grad_norm": 0.1171875, "learning_rate": 0.001551, "loss": 1.3229, "step": 517 }, { "epoch": 0.04542433912601445, "grad_norm": 0.0869140625, "learning_rate": 0.001554, "loss": 1.3284, "step": 518 }, { "epoch": 0.045512030900388994, "grad_norm": 0.11767578125, "learning_rate": 0.001557, "loss": 1.3185, "step": 519 }, { "epoch": 0.04559972267476354, "grad_norm": 0.12255859375, "learning_rate": 0.0015600000000000002, "loss": 1.4157, "step": 520 }, { "epoch": 0.045687414449138083, "grad_norm": 0.1171875, "learning_rate": 0.0015630000000000002, "loss": 1.3225, "step": 521 }, { "epoch": 0.04577510622351263, "grad_norm": 0.1591796875, "learning_rate": 0.0015660000000000001, "loss": 1.2768, "step": 522 }, { "epoch": 0.04586279799788718, "grad_norm": 0.1083984375, "learning_rate": 0.001569, "loss": 1.3378, "step": 523 }, { "epoch": 0.045950489772261724, "grad_norm": 0.2158203125, "learning_rate": 0.001572, "loss": 1.3312, "step": 524 }, { "epoch": 0.04603818154663627, "grad_norm": 0.146484375, "learning_rate": 0.001575, "loss": 1.3473, "step": 525 }, { "epoch": 0.04612587332101081, "grad_norm": 0.1533203125, "learning_rate": 0.0015780000000000002, "loss": 1.3193, "step": 526 }, { "epoch": 0.04621356509538536, "grad_norm": 0.166015625, "learning_rate": 0.0015810000000000002, "loss": 1.3617, "step": 527 }, { "epoch": 0.0463012568697599, "grad_norm": 0.146484375, "learning_rate": 0.0015840000000000001, "loss": 1.4205, "step": 528 }, { "epoch": 0.046388948644134446, "grad_norm": 0.1943359375, "learning_rate": 0.001587, "loss": 1.3412, "step": 529 }, { "epoch": 0.04647664041850899, "grad_norm": 0.138671875, "learning_rate": 0.00159, "loss": 1.3731, "step": 530 }, { "epoch": 0.046564332192883535, "grad_norm": 0.158203125, "learning_rate": 0.001593, "loss": 1.3481, "step": 531 }, { "epoch": 0.04665202396725809, "grad_norm": 0.11865234375, "learning_rate": 0.0015960000000000002, "loss": 1.3438, "step": 532 }, { "epoch": 0.04673971574163263, "grad_norm": 0.11376953125, "learning_rate": 0.0015990000000000002, "loss": 1.3893, "step": 533 }, { "epoch": 0.046827407516007176, "grad_norm": 0.12060546875, "learning_rate": 0.0016020000000000001, "loss": 1.3365, "step": 534 }, { "epoch": 0.04691509929038172, "grad_norm": 0.177734375, "learning_rate": 0.001605, "loss": 1.3429, "step": 535 }, { "epoch": 0.047002791064756265, "grad_norm": 0.109375, "learning_rate": 0.001608, "loss": 1.3629, "step": 536 }, { "epoch": 0.04709048283913081, "grad_norm": 0.177734375, "learning_rate": 0.0016110000000000002, "loss": 1.2756, "step": 537 }, { "epoch": 0.047178174613505354, "grad_norm": 0.1103515625, "learning_rate": 0.0016140000000000002, "loss": 1.3064, "step": 538 }, { "epoch": 0.0472658663878799, "grad_norm": 0.1455078125, "learning_rate": 0.0016170000000000002, "loss": 1.3871, "step": 539 }, { "epoch": 0.04735355816225444, "grad_norm": 0.1220703125, "learning_rate": 0.0016200000000000001, "loss": 1.2368, "step": 540 }, { "epoch": 0.047441249936628994, "grad_norm": 0.1572265625, "learning_rate": 0.001623, "loss": 1.376, "step": 541 }, { "epoch": 0.04752894171100354, "grad_norm": 0.16015625, "learning_rate": 0.001626, "loss": 1.3061, "step": 542 }, { "epoch": 0.04761663348537808, "grad_norm": 0.130859375, "learning_rate": 0.0016290000000000002, "loss": 1.2628, "step": 543 }, { "epoch": 0.04770432525975263, "grad_norm": 0.1337890625, "learning_rate": 0.0016320000000000002, "loss": 1.2792, "step": 544 }, { "epoch": 0.04779201703412717, "grad_norm": 0.10107421875, "learning_rate": 0.0016350000000000002, "loss": 1.285, "step": 545 }, { "epoch": 0.04787970880850172, "grad_norm": 0.193359375, "learning_rate": 0.0016380000000000001, "loss": 1.3308, "step": 546 }, { "epoch": 0.04796740058287626, "grad_norm": 0.10302734375, "learning_rate": 0.001641, "loss": 1.3833, "step": 547 }, { "epoch": 0.048055092357250806, "grad_norm": 0.2080078125, "learning_rate": 0.001644, "loss": 1.3428, "step": 548 }, { "epoch": 0.04814278413162535, "grad_norm": 0.083984375, "learning_rate": 0.0016470000000000002, "loss": 1.3376, "step": 549 }, { "epoch": 0.0482304759059999, "grad_norm": 0.099609375, "learning_rate": 0.0016500000000000002, "loss": 1.4305, "step": 550 }, { "epoch": 0.048318167680374446, "grad_norm": 0.140625, "learning_rate": 0.0016530000000000002, "loss": 1.3828, "step": 551 }, { "epoch": 0.04840585945474899, "grad_norm": 0.09716796875, "learning_rate": 0.0016560000000000001, "loss": 1.323, "step": 552 }, { "epoch": 0.048493551229123535, "grad_norm": 0.12890625, "learning_rate": 0.001659, "loss": 1.3733, "step": 553 }, { "epoch": 0.04858124300349808, "grad_norm": 0.08837890625, "learning_rate": 0.0016620000000000003, "loss": 1.3101, "step": 554 }, { "epoch": 0.048668934777872624, "grad_norm": 0.1064453125, "learning_rate": 0.0016650000000000002, "loss": 1.3806, "step": 555 }, { "epoch": 0.04875662655224717, "grad_norm": 0.119140625, "learning_rate": 0.0016680000000000002, "loss": 1.321, "step": 556 }, { "epoch": 0.04884431832662171, "grad_norm": 0.1416015625, "learning_rate": 0.0016710000000000002, "loss": 1.3756, "step": 557 }, { "epoch": 0.04893201010099626, "grad_norm": 0.1572265625, "learning_rate": 0.0016740000000000001, "loss": 1.3487, "step": 558 }, { "epoch": 0.04901970187537081, "grad_norm": 0.11669921875, "learning_rate": 0.001677, "loss": 1.2843, "step": 559 }, { "epoch": 0.049107393649745354, "grad_norm": 0.1220703125, "learning_rate": 0.0016800000000000003, "loss": 1.3796, "step": 560 }, { "epoch": 0.0491950854241199, "grad_norm": 0.08056640625, "learning_rate": 0.0016830000000000003, "loss": 1.3371, "step": 561 }, { "epoch": 0.04928277719849444, "grad_norm": 0.10107421875, "learning_rate": 0.0016860000000000002, "loss": 1.3571, "step": 562 }, { "epoch": 0.04937046897286899, "grad_norm": 0.140625, "learning_rate": 0.001689, "loss": 1.3039, "step": 563 }, { "epoch": 0.04945816074724353, "grad_norm": 0.0888671875, "learning_rate": 0.001692, "loss": 1.3606, "step": 564 }, { "epoch": 0.049545852521618076, "grad_norm": 0.1259765625, "learning_rate": 0.001695, "loss": 1.348, "step": 565 }, { "epoch": 0.04963354429599262, "grad_norm": 0.10400390625, "learning_rate": 0.0016979999999999999, "loss": 1.291, "step": 566 }, { "epoch": 0.049721236070367165, "grad_norm": 0.142578125, "learning_rate": 0.0017009999999999998, "loss": 1.3581, "step": 567 }, { "epoch": 0.04980892784474172, "grad_norm": 0.0947265625, "learning_rate": 0.0017039999999999998, "loss": 1.311, "step": 568 }, { "epoch": 0.04989661961911626, "grad_norm": 0.1416015625, "learning_rate": 0.001707, "loss": 1.3596, "step": 569 }, { "epoch": 0.049984311393490806, "grad_norm": 0.11376953125, "learning_rate": 0.00171, "loss": 1.3427, "step": 570 }, { "epoch": 0.05007200316786535, "grad_norm": 0.1396484375, "learning_rate": 0.001713, "loss": 1.3852, "step": 571 }, { "epoch": 0.050159694942239895, "grad_norm": 0.1767578125, "learning_rate": 0.0017159999999999999, "loss": 1.3147, "step": 572 }, { "epoch": 0.05024738671661444, "grad_norm": 0.11181640625, "learning_rate": 0.0017189999999999998, "loss": 1.3285, "step": 573 }, { "epoch": 0.050335078490988984, "grad_norm": 0.1865234375, "learning_rate": 0.001722, "loss": 1.3213, "step": 574 }, { "epoch": 0.05042277026536353, "grad_norm": 0.09619140625, "learning_rate": 0.001725, "loss": 1.2918, "step": 575 }, { "epoch": 0.05051046203973807, "grad_norm": 0.09912109375, "learning_rate": 0.001728, "loss": 1.3342, "step": 576 }, { "epoch": 0.05059815381411262, "grad_norm": 0.11865234375, "learning_rate": 0.001731, "loss": 1.3351, "step": 577 }, { "epoch": 0.05068584558848717, "grad_norm": 0.140625, "learning_rate": 0.0017339999999999999, "loss": 1.4707, "step": 578 }, { "epoch": 0.05077353736286171, "grad_norm": 0.125, "learning_rate": 0.0017369999999999998, "loss": 1.3766, "step": 579 }, { "epoch": 0.05086122913723626, "grad_norm": 0.1630859375, "learning_rate": 0.00174, "loss": 1.3022, "step": 580 }, { "epoch": 0.0509489209116108, "grad_norm": 0.109375, "learning_rate": 0.001743, "loss": 1.3365, "step": 581 }, { "epoch": 0.05103661268598535, "grad_norm": 0.1552734375, "learning_rate": 0.001746, "loss": 1.2805, "step": 582 }, { "epoch": 0.05112430446035989, "grad_norm": 0.1396484375, "learning_rate": 0.001749, "loss": 1.3641, "step": 583 }, { "epoch": 0.051211996234734436, "grad_norm": 0.130859375, "learning_rate": 0.0017519999999999999, "loss": 1.3591, "step": 584 }, { "epoch": 0.05129968800910898, "grad_norm": 0.1123046875, "learning_rate": 0.0017549999999999998, "loss": 1.2795, "step": 585 }, { "epoch": 0.051387379783483525, "grad_norm": 0.09814453125, "learning_rate": 0.001758, "loss": 1.4044, "step": 586 }, { "epoch": 0.051475071557858076, "grad_norm": 0.142578125, "learning_rate": 0.001761, "loss": 1.2848, "step": 587 }, { "epoch": 0.05156276333223262, "grad_norm": 0.203125, "learning_rate": 0.001764, "loss": 1.3604, "step": 588 }, { "epoch": 0.051650455106607165, "grad_norm": 0.1396484375, "learning_rate": 0.001767, "loss": 1.265, "step": 589 }, { "epoch": 0.05173814688098171, "grad_norm": 0.1708984375, "learning_rate": 0.0017699999999999999, "loss": 1.3369, "step": 590 }, { "epoch": 0.051825838655356254, "grad_norm": 0.1123046875, "learning_rate": 0.001773, "loss": 1.3202, "step": 591 }, { "epoch": 0.0519135304297308, "grad_norm": 0.1044921875, "learning_rate": 0.001776, "loss": 1.3488, "step": 592 }, { "epoch": 0.05200122220410534, "grad_norm": 0.1435546875, "learning_rate": 0.001779, "loss": 1.3653, "step": 593 }, { "epoch": 0.05208891397847989, "grad_norm": 0.111328125, "learning_rate": 0.001782, "loss": 1.3817, "step": 594 }, { "epoch": 0.05217660575285443, "grad_norm": 0.1259765625, "learning_rate": 0.001785, "loss": 1.3422, "step": 595 }, { "epoch": 0.052264297527228984, "grad_norm": 0.10302734375, "learning_rate": 0.0017879999999999999, "loss": 1.3877, "step": 596 }, { "epoch": 0.05235198930160353, "grad_norm": 0.1318359375, "learning_rate": 0.001791, "loss": 1.3137, "step": 597 }, { "epoch": 0.05243968107597807, "grad_norm": 0.10986328125, "learning_rate": 0.001794, "loss": 1.3798, "step": 598 }, { "epoch": 0.05252737285035262, "grad_norm": 0.11474609375, "learning_rate": 0.001797, "loss": 1.3497, "step": 599 }, { "epoch": 0.05261506462472716, "grad_norm": 0.1142578125, "learning_rate": 0.0018, "loss": 1.3417, "step": 600 }, { "epoch": 0.052702756399101706, "grad_norm": 0.125, "learning_rate": 0.001803, "loss": 1.3298, "step": 601 }, { "epoch": 0.05279044817347625, "grad_norm": 0.10595703125, "learning_rate": 0.0018059999999999999, "loss": 1.3585, "step": 602 }, { "epoch": 0.052878139947850795, "grad_norm": 0.11279296875, "learning_rate": 0.001809, "loss": 1.331, "step": 603 }, { "epoch": 0.05296583172222534, "grad_norm": 0.11376953125, "learning_rate": 0.001812, "loss": 1.2992, "step": 604 }, { "epoch": 0.05305352349659989, "grad_norm": 0.09619140625, "learning_rate": 0.001815, "loss": 1.2915, "step": 605 }, { "epoch": 0.053141215270974436, "grad_norm": 0.09033203125, "learning_rate": 0.001818, "loss": 1.32, "step": 606 }, { "epoch": 0.05322890704534898, "grad_norm": 0.0771484375, "learning_rate": 0.001821, "loss": 1.3601, "step": 607 }, { "epoch": 0.053316598819723525, "grad_norm": 0.0927734375, "learning_rate": 0.001824, "loss": 1.3336, "step": 608 }, { "epoch": 0.05340429059409807, "grad_norm": 0.09765625, "learning_rate": 0.001827, "loss": 1.347, "step": 609 }, { "epoch": 0.053491982368472614, "grad_norm": 0.1650390625, "learning_rate": 0.00183, "loss": 1.3347, "step": 610 }, { "epoch": 0.05357967414284716, "grad_norm": 0.10546875, "learning_rate": 0.001833, "loss": 1.3217, "step": 611 }, { "epoch": 0.0536673659172217, "grad_norm": 0.09716796875, "learning_rate": 0.001836, "loss": 1.2653, "step": 612 }, { "epoch": 0.05375505769159625, "grad_norm": 0.11083984375, "learning_rate": 0.001839, "loss": 1.3131, "step": 613 }, { "epoch": 0.0538427494659708, "grad_norm": 0.166015625, "learning_rate": 0.001842, "loss": 1.3646, "step": 614 }, { "epoch": 0.05393044124034534, "grad_norm": 0.091796875, "learning_rate": 0.001845, "loss": 1.3012, "step": 615 }, { "epoch": 0.05401813301471989, "grad_norm": 0.11181640625, "learning_rate": 0.001848, "loss": 1.2882, "step": 616 }, { "epoch": 0.05410582478909443, "grad_norm": 0.0927734375, "learning_rate": 0.001851, "loss": 1.2802, "step": 617 }, { "epoch": 0.05419351656346898, "grad_norm": 0.107421875, "learning_rate": 0.001854, "loss": 1.3446, "step": 618 }, { "epoch": 0.05428120833784352, "grad_norm": 0.138671875, "learning_rate": 0.001857, "loss": 1.3352, "step": 619 }, { "epoch": 0.054368900112218066, "grad_norm": 0.14453125, "learning_rate": 0.00186, "loss": 1.3795, "step": 620 }, { "epoch": 0.05445659188659261, "grad_norm": 0.142578125, "learning_rate": 0.001863, "loss": 1.3184, "step": 621 }, { "epoch": 0.054544283660967155, "grad_norm": 0.12890625, "learning_rate": 0.001866, "loss": 1.306, "step": 622 }, { "epoch": 0.054631975435341706, "grad_norm": 0.1357421875, "learning_rate": 0.001869, "loss": 1.3334, "step": 623 }, { "epoch": 0.05471966720971625, "grad_norm": 0.09765625, "learning_rate": 0.001872, "loss": 1.3268, "step": 624 }, { "epoch": 0.054807358984090795, "grad_norm": 0.11181640625, "learning_rate": 0.001875, "loss": 1.3805, "step": 625 }, { "epoch": 0.05489505075846534, "grad_norm": 0.083984375, "learning_rate": 0.0018780000000000001, "loss": 1.4231, "step": 626 }, { "epoch": 0.054982742532839884, "grad_norm": 0.1455078125, "learning_rate": 0.001881, "loss": 1.3484, "step": 627 }, { "epoch": 0.05507043430721443, "grad_norm": 0.1064453125, "learning_rate": 0.001884, "loss": 1.3057, "step": 628 }, { "epoch": 0.05515812608158897, "grad_norm": 0.1181640625, "learning_rate": 0.001887, "loss": 1.3342, "step": 629 }, { "epoch": 0.05524581785596352, "grad_norm": 0.1396484375, "learning_rate": 0.00189, "loss": 1.3539, "step": 630 }, { "epoch": 0.05533350963033806, "grad_norm": 0.1279296875, "learning_rate": 0.0018930000000000002, "loss": 1.3593, "step": 631 }, { "epoch": 0.055421201404712614, "grad_norm": 0.1279296875, "learning_rate": 0.0018960000000000001, "loss": 1.3262, "step": 632 }, { "epoch": 0.05550889317908716, "grad_norm": 0.126953125, "learning_rate": 0.001899, "loss": 1.3629, "step": 633 }, { "epoch": 0.0555965849534617, "grad_norm": 0.107421875, "learning_rate": 0.001902, "loss": 1.313, "step": 634 }, { "epoch": 0.05568427672783625, "grad_norm": 0.1376953125, "learning_rate": 0.001905, "loss": 1.3779, "step": 635 }, { "epoch": 0.05577196850221079, "grad_norm": 0.07958984375, "learning_rate": 0.001908, "loss": 1.3722, "step": 636 }, { "epoch": 0.055859660276585336, "grad_norm": 0.1494140625, "learning_rate": 0.0019110000000000002, "loss": 1.2903, "step": 637 }, { "epoch": 0.05594735205095988, "grad_norm": 0.09619140625, "learning_rate": 0.0019140000000000001, "loss": 1.2875, "step": 638 }, { "epoch": 0.056035043825334425, "grad_norm": 0.1337890625, "learning_rate": 0.001917, "loss": 1.3276, "step": 639 }, { "epoch": 0.05612273559970897, "grad_norm": 0.1142578125, "learning_rate": 0.00192, "loss": 1.2509, "step": 640 }, { "epoch": 0.05621042737408352, "grad_norm": 0.083984375, "learning_rate": 0.001923, "loss": 1.3149, "step": 641 }, { "epoch": 0.056298119148458066, "grad_norm": 0.1279296875, "learning_rate": 0.001926, "loss": 1.3123, "step": 642 }, { "epoch": 0.05638581092283261, "grad_norm": 0.09765625, "learning_rate": 0.0019290000000000002, "loss": 1.3152, "step": 643 }, { "epoch": 0.056473502697207155, "grad_norm": 0.1669921875, "learning_rate": 0.0019320000000000001, "loss": 1.337, "step": 644 }, { "epoch": 0.0565611944715817, "grad_norm": 0.11767578125, "learning_rate": 0.001935, "loss": 1.3222, "step": 645 }, { "epoch": 0.056648886245956244, "grad_norm": 0.1484375, "learning_rate": 0.001938, "loss": 1.2985, "step": 646 }, { "epoch": 0.05673657802033079, "grad_norm": 0.107421875, "learning_rate": 0.001941, "loss": 1.4031, "step": 647 }, { "epoch": 0.05682426979470533, "grad_norm": 0.1123046875, "learning_rate": 0.0019440000000000002, "loss": 1.2429, "step": 648 }, { "epoch": 0.05691196156907988, "grad_norm": 0.08154296875, "learning_rate": 0.0019470000000000002, "loss": 1.3375, "step": 649 }, { "epoch": 0.05699965334345443, "grad_norm": 0.12451171875, "learning_rate": 0.0019500000000000001, "loss": 1.3173, "step": 650 }, { "epoch": 0.05708734511782897, "grad_norm": 0.1171875, "learning_rate": 0.001953, "loss": 1.3144, "step": 651 }, { "epoch": 0.05717503689220352, "grad_norm": 0.12353515625, "learning_rate": 0.0019560000000000003, "loss": 1.4079, "step": 652 }, { "epoch": 0.05726272866657806, "grad_norm": 0.134765625, "learning_rate": 0.0019590000000000002, "loss": 1.4005, "step": 653 }, { "epoch": 0.05735042044095261, "grad_norm": 0.142578125, "learning_rate": 0.001962, "loss": 1.4134, "step": 654 }, { "epoch": 0.05743811221532715, "grad_norm": 0.169921875, "learning_rate": 0.001965, "loss": 1.3191, "step": 655 }, { "epoch": 0.057525803989701696, "grad_norm": 0.1220703125, "learning_rate": 0.001968, "loss": 1.3546, "step": 656 }, { "epoch": 0.05761349576407624, "grad_norm": 0.1513671875, "learning_rate": 0.001971, "loss": 1.3522, "step": 657 }, { "epoch": 0.057701187538450785, "grad_norm": 0.1484375, "learning_rate": 0.001974, "loss": 1.318, "step": 658 }, { "epoch": 0.057788879312825336, "grad_norm": 0.1103515625, "learning_rate": 0.001977, "loss": 1.3126, "step": 659 }, { "epoch": 0.05787657108719988, "grad_norm": 0.0947265625, "learning_rate": 0.00198, "loss": 1.3112, "step": 660 }, { "epoch": 0.057964262861574425, "grad_norm": 0.1044921875, "learning_rate": 0.001983, "loss": 1.3347, "step": 661 }, { "epoch": 0.05805195463594897, "grad_norm": 0.125, "learning_rate": 0.0019860000000000004, "loss": 1.384, "step": 662 }, { "epoch": 0.058139646410323514, "grad_norm": 0.11279296875, "learning_rate": 0.0019890000000000003, "loss": 1.3456, "step": 663 }, { "epoch": 0.05822733818469806, "grad_norm": 0.1220703125, "learning_rate": 0.0019920000000000003, "loss": 1.3589, "step": 664 }, { "epoch": 0.0583150299590726, "grad_norm": 0.1025390625, "learning_rate": 0.0019950000000000002, "loss": 1.3744, "step": 665 }, { "epoch": 0.05840272173344715, "grad_norm": 0.1474609375, "learning_rate": 0.001998, "loss": 1.3614, "step": 666 }, { "epoch": 0.05849041350782169, "grad_norm": 0.10400390625, "learning_rate": 0.002001, "loss": 1.3101, "step": 667 }, { "epoch": 0.058578105282196244, "grad_norm": 0.11767578125, "learning_rate": 0.002004, "loss": 1.3496, "step": 668 }, { "epoch": 0.05866579705657079, "grad_norm": 0.09716796875, "learning_rate": 0.002007, "loss": 1.3225, "step": 669 }, { "epoch": 0.05875348883094533, "grad_norm": 0.1318359375, "learning_rate": 0.00201, "loss": 1.3426, "step": 670 }, { "epoch": 0.05884118060531988, "grad_norm": 0.09521484375, "learning_rate": 0.002013, "loss": 1.2791, "step": 671 }, { "epoch": 0.05892887237969442, "grad_norm": 0.11279296875, "learning_rate": 0.002016, "loss": 1.2956, "step": 672 }, { "epoch": 0.059016564154068966, "grad_norm": 0.10400390625, "learning_rate": 0.002019, "loss": 1.2956, "step": 673 }, { "epoch": 0.05910425592844351, "grad_norm": 0.09228515625, "learning_rate": 0.0020220000000000004, "loss": 1.3212, "step": 674 }, { "epoch": 0.059191947702818055, "grad_norm": 0.10498046875, "learning_rate": 0.0020250000000000003, "loss": 1.3192, "step": 675 }, { "epoch": 0.0592796394771926, "grad_norm": 0.07958984375, "learning_rate": 0.0020280000000000003, "loss": 1.373, "step": 676 }, { "epoch": 0.059367331251567144, "grad_norm": 0.09423828125, "learning_rate": 0.0020310000000000003, "loss": 1.4266, "step": 677 }, { "epoch": 0.059455023025941696, "grad_norm": 0.08447265625, "learning_rate": 0.0020340000000000002, "loss": 1.3394, "step": 678 }, { "epoch": 0.05954271480031624, "grad_norm": 0.10400390625, "learning_rate": 0.002037, "loss": 1.3182, "step": 679 }, { "epoch": 0.059630406574690785, "grad_norm": 0.11328125, "learning_rate": 0.00204, "loss": 1.3112, "step": 680 }, { "epoch": 0.05971809834906533, "grad_norm": 0.08740234375, "learning_rate": 0.002043, "loss": 1.2881, "step": 681 }, { "epoch": 0.059805790123439874, "grad_norm": 0.1279296875, "learning_rate": 0.002046, "loss": 1.3234, "step": 682 }, { "epoch": 0.05989348189781442, "grad_norm": 0.1220703125, "learning_rate": 0.002049, "loss": 1.3557, "step": 683 }, { "epoch": 0.05998117367218896, "grad_norm": 0.138671875, "learning_rate": 0.002052, "loss": 1.3126, "step": 684 }, { "epoch": 0.06006886544656351, "grad_norm": 0.1259765625, "learning_rate": 0.0020550000000000004, "loss": 1.3457, "step": 685 }, { "epoch": 0.06015655722093805, "grad_norm": 0.0986328125, "learning_rate": 0.0020580000000000004, "loss": 1.295, "step": 686 }, { "epoch": 0.0602442489953126, "grad_norm": 0.115234375, "learning_rate": 0.0020610000000000003, "loss": 1.3832, "step": 687 }, { "epoch": 0.06033194076968715, "grad_norm": 0.130859375, "learning_rate": 0.002064, "loss": 1.338, "step": 688 }, { "epoch": 0.06041963254406169, "grad_norm": 0.08984375, "learning_rate": 0.002067, "loss": 1.3339, "step": 689 }, { "epoch": 0.06050732431843624, "grad_norm": 0.138671875, "learning_rate": 0.00207, "loss": 1.3436, "step": 690 }, { "epoch": 0.06059501609281078, "grad_norm": 0.09423828125, "learning_rate": 0.0020729999999999998, "loss": 1.3324, "step": 691 }, { "epoch": 0.060682707867185326, "grad_norm": 0.1435546875, "learning_rate": 0.0020759999999999997, "loss": 1.3587, "step": 692 }, { "epoch": 0.06077039964155987, "grad_norm": 0.15234375, "learning_rate": 0.0020789999999999997, "loss": 1.3409, "step": 693 }, { "epoch": 0.060858091415934415, "grad_norm": 0.09716796875, "learning_rate": 0.002082, "loss": 1.358, "step": 694 }, { "epoch": 0.06094578319030896, "grad_norm": 0.11767578125, "learning_rate": 0.002085, "loss": 1.2721, "step": 695 }, { "epoch": 0.06103347496468351, "grad_norm": 0.1533203125, "learning_rate": 0.002088, "loss": 1.3178, "step": 696 }, { "epoch": 0.061121166739058055, "grad_norm": 0.1494140625, "learning_rate": 0.002091, "loss": 1.2921, "step": 697 }, { "epoch": 0.0612088585134326, "grad_norm": 0.07958984375, "learning_rate": 0.002094, "loss": 1.2661, "step": 698 }, { "epoch": 0.061296550287807144, "grad_norm": 0.1259765625, "learning_rate": 0.002097, "loss": 1.3789, "step": 699 }, { "epoch": 0.06138424206218169, "grad_norm": 0.0927734375, "learning_rate": 0.0021, "loss": 1.3957, "step": 700 }, { "epoch": 0.06147193383655623, "grad_norm": 0.125, "learning_rate": 0.002103, "loss": 1.348, "step": 701 }, { "epoch": 0.06155962561093078, "grad_norm": 0.103515625, "learning_rate": 0.002106, "loss": 1.3111, "step": 702 }, { "epoch": 0.06164731738530532, "grad_norm": 0.08251953125, "learning_rate": 0.0021089999999999998, "loss": 1.321, "step": 703 }, { "epoch": 0.06173500915967987, "grad_norm": 0.09814453125, "learning_rate": 0.0021119999999999997, "loss": 1.4042, "step": 704 }, { "epoch": 0.06182270093405442, "grad_norm": 0.12890625, "learning_rate": 0.002115, "loss": 1.2891, "step": 705 }, { "epoch": 0.06191039270842896, "grad_norm": 0.125, "learning_rate": 0.002118, "loss": 1.2994, "step": 706 }, { "epoch": 0.06199808448280351, "grad_norm": 0.1640625, "learning_rate": 0.002121, "loss": 1.4005, "step": 707 }, { "epoch": 0.06208577625717805, "grad_norm": 0.109375, "learning_rate": 0.002124, "loss": 1.3539, "step": 708 }, { "epoch": 0.062173468031552596, "grad_norm": 0.1513671875, "learning_rate": 0.002127, "loss": 1.3263, "step": 709 }, { "epoch": 0.06226115980592714, "grad_norm": 0.083984375, "learning_rate": 0.00213, "loss": 1.3272, "step": 710 }, { "epoch": 0.062348851580301685, "grad_norm": 0.11865234375, "learning_rate": 0.002133, "loss": 1.3914, "step": 711 }, { "epoch": 0.06243654335467623, "grad_norm": 0.0966796875, "learning_rate": 0.002136, "loss": 1.3741, "step": 712 }, { "epoch": 0.06252423512905078, "grad_norm": 0.1298828125, "learning_rate": 0.002139, "loss": 1.3691, "step": 713 }, { "epoch": 0.06261192690342532, "grad_norm": 0.08984375, "learning_rate": 0.002142, "loss": 1.3387, "step": 714 }, { "epoch": 0.06269961867779987, "grad_norm": 0.095703125, "learning_rate": 0.0021449999999999998, "loss": 1.3177, "step": 715 }, { "epoch": 0.06278731045217441, "grad_norm": 0.08447265625, "learning_rate": 0.002148, "loss": 1.3291, "step": 716 }, { "epoch": 0.06287500222654896, "grad_norm": 0.115234375, "learning_rate": 0.002151, "loss": 1.3442, "step": 717 }, { "epoch": 0.06296269400092351, "grad_norm": 0.08154296875, "learning_rate": 0.002154, "loss": 1.352, "step": 718 }, { "epoch": 0.06305038577529805, "grad_norm": 0.12255859375, "learning_rate": 0.002157, "loss": 1.3209, "step": 719 }, { "epoch": 0.0631380775496726, "grad_norm": 0.080078125, "learning_rate": 0.00216, "loss": 1.3168, "step": 720 }, { "epoch": 0.06322576932404714, "grad_norm": 0.12060546875, "learning_rate": 0.002163, "loss": 1.3624, "step": 721 }, { "epoch": 0.06331346109842169, "grad_norm": 0.09619140625, "learning_rate": 0.002166, "loss": 1.3289, "step": 722 }, { "epoch": 0.06340115287279623, "grad_norm": 0.1416015625, "learning_rate": 0.002169, "loss": 1.3062, "step": 723 }, { "epoch": 0.06348884464717078, "grad_norm": 0.09912109375, "learning_rate": 0.002172, "loss": 1.2927, "step": 724 }, { "epoch": 0.06357653642154532, "grad_norm": 0.126953125, "learning_rate": 0.002175, "loss": 1.3349, "step": 725 }, { "epoch": 0.06366422819591987, "grad_norm": 0.162109375, "learning_rate": 0.002178, "loss": 1.4218, "step": 726 }, { "epoch": 0.0637519199702944, "grad_norm": 0.23828125, "learning_rate": 0.0021809999999999998, "loss": 1.4004, "step": 727 }, { "epoch": 0.06383961174466896, "grad_norm": 0.1533203125, "learning_rate": 0.002184, "loss": 1.3236, "step": 728 }, { "epoch": 0.06392730351904351, "grad_norm": 0.1640625, "learning_rate": 0.002187, "loss": 1.2873, "step": 729 }, { "epoch": 0.06401499529341804, "grad_norm": 0.10400390625, "learning_rate": 0.00219, "loss": 1.3388, "step": 730 }, { "epoch": 0.0641026870677926, "grad_norm": 0.16796875, "learning_rate": 0.002193, "loss": 1.3179, "step": 731 }, { "epoch": 0.06419037884216713, "grad_norm": 0.1318359375, "learning_rate": 0.002196, "loss": 1.3483, "step": 732 }, { "epoch": 0.06427807061654169, "grad_norm": 0.1357421875, "learning_rate": 0.002199, "loss": 1.3194, "step": 733 }, { "epoch": 0.06436576239091622, "grad_norm": 0.1318359375, "learning_rate": 0.002202, "loss": 1.3404, "step": 734 }, { "epoch": 0.06445345416529077, "grad_norm": 0.125, "learning_rate": 0.002205, "loss": 1.2977, "step": 735 }, { "epoch": 0.06454114593966531, "grad_norm": 0.1083984375, "learning_rate": 0.002208, "loss": 1.362, "step": 736 }, { "epoch": 0.06462883771403986, "grad_norm": 0.09423828125, "learning_rate": 0.002211, "loss": 1.3174, "step": 737 }, { "epoch": 0.06471652948841441, "grad_norm": 0.1435546875, "learning_rate": 0.002214, "loss": 1.3242, "step": 738 }, { "epoch": 0.06480422126278895, "grad_norm": 0.10888671875, "learning_rate": 0.0022170000000000002, "loss": 1.296, "step": 739 }, { "epoch": 0.0648919130371635, "grad_norm": 0.08837890625, "learning_rate": 0.00222, "loss": 1.3848, "step": 740 }, { "epoch": 0.06497960481153804, "grad_norm": 0.10693359375, "learning_rate": 0.002223, "loss": 1.3586, "step": 741 }, { "epoch": 0.06506729658591259, "grad_norm": 0.09912109375, "learning_rate": 0.002226, "loss": 1.2866, "step": 742 }, { "epoch": 0.06515498836028713, "grad_norm": 0.1044921875, "learning_rate": 0.002229, "loss": 1.2886, "step": 743 }, { "epoch": 0.06524268013466168, "grad_norm": 0.10009765625, "learning_rate": 0.002232, "loss": 1.3764, "step": 744 }, { "epoch": 0.06533037190903622, "grad_norm": 0.08837890625, "learning_rate": 0.002235, "loss": 1.3943, "step": 745 }, { "epoch": 0.06541806368341077, "grad_norm": 0.12890625, "learning_rate": 0.002238, "loss": 1.3645, "step": 746 }, { "epoch": 0.06550575545778532, "grad_norm": 0.0869140625, "learning_rate": 0.002241, "loss": 1.2752, "step": 747 }, { "epoch": 0.06559344723215986, "grad_norm": 0.11474609375, "learning_rate": 0.002244, "loss": 1.2695, "step": 748 }, { "epoch": 0.06568113900653441, "grad_norm": 0.1357421875, "learning_rate": 0.002247, "loss": 1.3008, "step": 749 }, { "epoch": 0.06576883078090895, "grad_norm": 0.126953125, "learning_rate": 0.0022500000000000003, "loss": 1.2855, "step": 750 }, { "epoch": 0.0658565225552835, "grad_norm": 0.09033203125, "learning_rate": 0.0022530000000000002, "loss": 1.3534, "step": 751 }, { "epoch": 0.06594421432965804, "grad_norm": 0.08642578125, "learning_rate": 0.002256, "loss": 1.3676, "step": 752 }, { "epoch": 0.06603190610403259, "grad_norm": 0.1279296875, "learning_rate": 0.002259, "loss": 1.2932, "step": 753 }, { "epoch": 0.06611959787840713, "grad_norm": 0.1298828125, "learning_rate": 0.002262, "loss": 1.3035, "step": 754 }, { "epoch": 0.06620728965278168, "grad_norm": 0.103515625, "learning_rate": 0.002265, "loss": 1.2848, "step": 755 }, { "epoch": 0.06629498142715623, "grad_norm": 0.126953125, "learning_rate": 0.002268, "loss": 1.2856, "step": 756 }, { "epoch": 0.06638267320153077, "grad_norm": 0.109375, "learning_rate": 0.002271, "loss": 1.3616, "step": 757 }, { "epoch": 0.06647036497590532, "grad_norm": 0.1259765625, "learning_rate": 0.002274, "loss": 1.3047, "step": 758 }, { "epoch": 0.06655805675027986, "grad_norm": 0.1689453125, "learning_rate": 0.002277, "loss": 1.4104, "step": 759 }, { "epoch": 0.06664574852465441, "grad_norm": 0.11865234375, "learning_rate": 0.00228, "loss": 1.2926, "step": 760 }, { "epoch": 0.06673344029902895, "grad_norm": 0.0986328125, "learning_rate": 0.002283, "loss": 1.3173, "step": 761 }, { "epoch": 0.0668211320734035, "grad_norm": 0.1796875, "learning_rate": 0.0022860000000000003, "loss": 1.3172, "step": 762 }, { "epoch": 0.06690882384777803, "grad_norm": 0.1201171875, "learning_rate": 0.0022890000000000002, "loss": 1.3202, "step": 763 }, { "epoch": 0.06699651562215259, "grad_norm": 0.1435546875, "learning_rate": 0.002292, "loss": 1.3341, "step": 764 }, { "epoch": 0.06708420739652714, "grad_norm": 0.1767578125, "learning_rate": 0.002295, "loss": 1.3316, "step": 765 }, { "epoch": 0.06717189917090167, "grad_norm": 0.09716796875, "learning_rate": 0.002298, "loss": 1.3074, "step": 766 }, { "epoch": 0.06725959094527623, "grad_norm": 0.26171875, "learning_rate": 0.002301, "loss": 1.3714, "step": 767 }, { "epoch": 0.06734728271965076, "grad_norm": 0.21484375, "learning_rate": 0.002304, "loss": 1.3505, "step": 768 }, { "epoch": 0.06743497449402532, "grad_norm": 0.091796875, "learning_rate": 0.002307, "loss": 1.2795, "step": 769 }, { "epoch": 0.06752266626839985, "grad_norm": 0.146484375, "learning_rate": 0.00231, "loss": 1.2834, "step": 770 }, { "epoch": 0.0676103580427744, "grad_norm": 0.09619140625, "learning_rate": 0.002313, "loss": 1.2901, "step": 771 }, { "epoch": 0.06769804981714894, "grad_norm": 0.11083984375, "learning_rate": 0.002316, "loss": 1.2928, "step": 772 }, { "epoch": 0.0677857415915235, "grad_norm": 0.0869140625, "learning_rate": 0.0023190000000000003, "loss": 1.3493, "step": 773 }, { "epoch": 0.06787343336589804, "grad_norm": 0.1044921875, "learning_rate": 0.0023220000000000003, "loss": 1.3889, "step": 774 }, { "epoch": 0.06796112514027258, "grad_norm": 0.10009765625, "learning_rate": 0.0023250000000000002, "loss": 1.2773, "step": 775 }, { "epoch": 0.06804881691464713, "grad_norm": 0.08740234375, "learning_rate": 0.002328, "loss": 1.3119, "step": 776 }, { "epoch": 0.06813650868902167, "grad_norm": 0.11376953125, "learning_rate": 0.002331, "loss": 1.3411, "step": 777 }, { "epoch": 0.06822420046339622, "grad_norm": 0.0771484375, "learning_rate": 0.002334, "loss": 1.3557, "step": 778 }, { "epoch": 0.06831189223777076, "grad_norm": 0.11669921875, "learning_rate": 0.002337, "loss": 1.3282, "step": 779 }, { "epoch": 0.06839958401214531, "grad_norm": 0.07666015625, "learning_rate": 0.00234, "loss": 1.3127, "step": 780 }, { "epoch": 0.06848727578651985, "grad_norm": 0.119140625, "learning_rate": 0.002343, "loss": 1.3505, "step": 781 }, { "epoch": 0.0685749675608944, "grad_norm": 0.08447265625, "learning_rate": 0.002346, "loss": 1.368, "step": 782 }, { "epoch": 0.06866265933526895, "grad_norm": 0.1572265625, "learning_rate": 0.002349, "loss": 1.3141, "step": 783 }, { "epoch": 0.06875035110964349, "grad_norm": 0.0830078125, "learning_rate": 0.002352, "loss": 1.3129, "step": 784 }, { "epoch": 0.06883804288401804, "grad_norm": 0.1240234375, "learning_rate": 0.0023550000000000003, "loss": 1.3506, "step": 785 }, { "epoch": 0.06892573465839258, "grad_norm": 0.0888671875, "learning_rate": 0.0023580000000000003, "loss": 1.3198, "step": 786 }, { "epoch": 0.06901342643276713, "grad_norm": 0.1162109375, "learning_rate": 0.0023610000000000003, "loss": 1.3936, "step": 787 }, { "epoch": 0.06910111820714167, "grad_norm": 0.09326171875, "learning_rate": 0.002364, "loss": 1.3044, "step": 788 }, { "epoch": 0.06918880998151622, "grad_norm": 0.13671875, "learning_rate": 0.002367, "loss": 1.3279, "step": 789 }, { "epoch": 0.06927650175589076, "grad_norm": 0.134765625, "learning_rate": 0.00237, "loss": 1.3941, "step": 790 }, { "epoch": 0.06936419353026531, "grad_norm": 0.08349609375, "learning_rate": 0.002373, "loss": 1.3437, "step": 791 }, { "epoch": 0.06945188530463986, "grad_norm": 0.0859375, "learning_rate": 0.002376, "loss": 1.4024, "step": 792 }, { "epoch": 0.0695395770790144, "grad_norm": 0.08642578125, "learning_rate": 0.002379, "loss": 1.3566, "step": 793 }, { "epoch": 0.06962726885338895, "grad_norm": 0.09814453125, "learning_rate": 0.002382, "loss": 1.2291, "step": 794 }, { "epoch": 0.06971496062776349, "grad_norm": 0.0849609375, "learning_rate": 0.002385, "loss": 1.3785, "step": 795 }, { "epoch": 0.06980265240213804, "grad_norm": 0.1103515625, "learning_rate": 0.0023880000000000004, "loss": 1.3573, "step": 796 }, { "epoch": 0.06989034417651258, "grad_norm": 0.140625, "learning_rate": 0.0023910000000000003, "loss": 1.3114, "step": 797 }, { "epoch": 0.06997803595088713, "grad_norm": 0.09228515625, "learning_rate": 0.0023940000000000003, "loss": 1.3111, "step": 798 }, { "epoch": 0.07006572772526166, "grad_norm": 0.1728515625, "learning_rate": 0.0023970000000000003, "loss": 1.416, "step": 799 }, { "epoch": 0.07015341949963622, "grad_norm": 0.181640625, "learning_rate": 0.0024000000000000002, "loss": 1.2953, "step": 800 }, { "epoch": 0.07024111127401077, "grad_norm": 0.150390625, "learning_rate": 0.002403, "loss": 1.3164, "step": 801 }, { "epoch": 0.0703288030483853, "grad_norm": 0.205078125, "learning_rate": 0.002406, "loss": 1.44, "step": 802 }, { "epoch": 0.07041649482275986, "grad_norm": 0.12158203125, "learning_rate": 0.002409, "loss": 1.3285, "step": 803 }, { "epoch": 0.0705041865971344, "grad_norm": 0.11962890625, "learning_rate": 0.002412, "loss": 1.2978, "step": 804 }, { "epoch": 0.07059187837150895, "grad_norm": 0.10791015625, "learning_rate": 0.002415, "loss": 1.3385, "step": 805 }, { "epoch": 0.07067957014588348, "grad_norm": 0.16796875, "learning_rate": 0.002418, "loss": 1.2502, "step": 806 }, { "epoch": 0.07076726192025803, "grad_norm": 0.09619140625, "learning_rate": 0.0024210000000000004, "loss": 1.2764, "step": 807 }, { "epoch": 0.07085495369463257, "grad_norm": 0.1884765625, "learning_rate": 0.0024240000000000004, "loss": 1.306, "step": 808 }, { "epoch": 0.07094264546900712, "grad_norm": 0.103515625, "learning_rate": 0.0024270000000000003, "loss": 1.2523, "step": 809 }, { "epoch": 0.07103033724338167, "grad_norm": 0.251953125, "learning_rate": 0.0024300000000000003, "loss": 1.3436, "step": 810 }, { "epoch": 0.07111802901775621, "grad_norm": 0.076171875, "learning_rate": 0.0024330000000000003, "loss": 1.329, "step": 811 }, { "epoch": 0.07120572079213076, "grad_norm": 0.2578125, "learning_rate": 0.0024360000000000002, "loss": 1.3534, "step": 812 }, { "epoch": 0.0712934125665053, "grad_norm": 0.0869140625, "learning_rate": 0.0024389999999999998, "loss": 1.2848, "step": 813 }, { "epoch": 0.07138110434087985, "grad_norm": 0.1787109375, "learning_rate": 0.0024419999999999997, "loss": 1.3431, "step": 814 }, { "epoch": 0.07146879611525439, "grad_norm": 0.09716796875, "learning_rate": 0.0024449999999999997, "loss": 1.2798, "step": 815 }, { "epoch": 0.07155648788962894, "grad_norm": 0.1640625, "learning_rate": 0.002448, "loss": 1.3048, "step": 816 }, { "epoch": 0.07164417966400348, "grad_norm": 0.11865234375, "learning_rate": 0.002451, "loss": 1.2944, "step": 817 }, { "epoch": 0.07173187143837803, "grad_norm": 0.296875, "learning_rate": 0.002454, "loss": 1.4416, "step": 818 }, { "epoch": 0.07181956321275258, "grad_norm": 0.1767578125, "learning_rate": 0.002457, "loss": 1.3192, "step": 819 }, { "epoch": 0.07190725498712712, "grad_norm": 0.2138671875, "learning_rate": 0.00246, "loss": 1.3287, "step": 820 }, { "epoch": 0.07199494676150167, "grad_norm": 0.1591796875, "learning_rate": 0.002463, "loss": 1.3128, "step": 821 }, { "epoch": 0.07208263853587621, "grad_norm": 0.162109375, "learning_rate": 0.002466, "loss": 1.3772, "step": 822 }, { "epoch": 0.07217033031025076, "grad_norm": 0.140625, "learning_rate": 0.002469, "loss": 1.3326, "step": 823 }, { "epoch": 0.0722580220846253, "grad_norm": 0.16796875, "learning_rate": 0.002472, "loss": 1.3595, "step": 824 }, { "epoch": 0.07234571385899985, "grad_norm": 0.12060546875, "learning_rate": 0.0024749999999999998, "loss": 1.3271, "step": 825 }, { "epoch": 0.07243340563337439, "grad_norm": 0.169921875, "learning_rate": 0.0024779999999999997, "loss": 1.3399, "step": 826 }, { "epoch": 0.07252109740774894, "grad_norm": 0.142578125, "learning_rate": 0.002481, "loss": 1.3322, "step": 827 }, { "epoch": 0.07260878918212349, "grad_norm": 0.20703125, "learning_rate": 0.002484, "loss": 1.3868, "step": 828 }, { "epoch": 0.07269648095649803, "grad_norm": 0.1767578125, "learning_rate": 0.002487, "loss": 1.3419, "step": 829 }, { "epoch": 0.07278417273087258, "grad_norm": 0.1865234375, "learning_rate": 0.00249, "loss": 1.3186, "step": 830 }, { "epoch": 0.07287186450524712, "grad_norm": 0.1640625, "learning_rate": 0.002493, "loss": 1.2891, "step": 831 }, { "epoch": 0.07295955627962167, "grad_norm": 0.154296875, "learning_rate": 0.002496, "loss": 1.3262, "step": 832 }, { "epoch": 0.0730472480539962, "grad_norm": 0.12451171875, "learning_rate": 0.002499, "loss": 1.3283, "step": 833 }, { "epoch": 0.07313493982837076, "grad_norm": 0.10009765625, "learning_rate": 0.002502, "loss": 1.3316, "step": 834 }, { "epoch": 0.0732226316027453, "grad_norm": 0.0888671875, "learning_rate": 0.002505, "loss": 1.324, "step": 835 }, { "epoch": 0.07331032337711985, "grad_norm": 0.08984375, "learning_rate": 0.002508, "loss": 1.3424, "step": 836 }, { "epoch": 0.0733980151514944, "grad_norm": 0.09765625, "learning_rate": 0.0025109999999999998, "loss": 1.3264, "step": 837 }, { "epoch": 0.07348570692586893, "grad_norm": 0.08740234375, "learning_rate": 0.0025139999999999997, "loss": 1.3647, "step": 838 }, { "epoch": 0.07357339870024349, "grad_norm": 0.10693359375, "learning_rate": 0.002517, "loss": 1.3417, "step": 839 }, { "epoch": 0.07366109047461802, "grad_norm": 0.10888671875, "learning_rate": 0.00252, "loss": 1.2623, "step": 840 }, { "epoch": 0.07374878224899258, "grad_norm": 0.142578125, "learning_rate": 0.002523, "loss": 1.3707, "step": 841 }, { "epoch": 0.07383647402336711, "grad_norm": 0.10400390625, "learning_rate": 0.002526, "loss": 1.398, "step": 842 }, { "epoch": 0.07392416579774166, "grad_norm": 0.09912109375, "learning_rate": 0.002529, "loss": 1.319, "step": 843 }, { "epoch": 0.0740118575721162, "grad_norm": 0.11962890625, "learning_rate": 0.002532, "loss": 1.324, "step": 844 }, { "epoch": 0.07409954934649075, "grad_norm": 0.16015625, "learning_rate": 0.002535, "loss": 1.2863, "step": 845 }, { "epoch": 0.0741872411208653, "grad_norm": 0.1396484375, "learning_rate": 0.002538, "loss": 1.3849, "step": 846 }, { "epoch": 0.07427493289523984, "grad_norm": 0.12353515625, "learning_rate": 0.002541, "loss": 1.3583, "step": 847 }, { "epoch": 0.0743626246696144, "grad_norm": 0.1396484375, "learning_rate": 0.002544, "loss": 1.383, "step": 848 }, { "epoch": 0.07445031644398893, "grad_norm": 0.1298828125, "learning_rate": 0.002547, "loss": 1.2858, "step": 849 }, { "epoch": 0.07453800821836348, "grad_norm": 0.1904296875, "learning_rate": 0.00255, "loss": 1.3304, "step": 850 }, { "epoch": 0.07462569999273802, "grad_norm": 0.1796875, "learning_rate": 0.002553, "loss": 1.4514, "step": 851 }, { "epoch": 0.07471339176711257, "grad_norm": 0.177734375, "learning_rate": 0.002556, "loss": 1.3373, "step": 852 }, { "epoch": 0.07480108354148711, "grad_norm": 0.1875, "learning_rate": 0.002559, "loss": 1.4162, "step": 853 }, { "epoch": 0.07488877531586166, "grad_norm": 0.1484375, "learning_rate": 0.002562, "loss": 1.3607, "step": 854 }, { "epoch": 0.07497646709023621, "grad_norm": 0.1435546875, "learning_rate": 0.002565, "loss": 1.3378, "step": 855 }, { "epoch": 0.07506415886461075, "grad_norm": 0.1435546875, "learning_rate": 0.002568, "loss": 1.3712, "step": 856 }, { "epoch": 0.0751518506389853, "grad_norm": 0.111328125, "learning_rate": 0.002571, "loss": 1.3485, "step": 857 }, { "epoch": 0.07523954241335984, "grad_norm": 0.0869140625, "learning_rate": 0.002574, "loss": 1.3551, "step": 858 }, { "epoch": 0.07532723418773439, "grad_norm": 0.072265625, "learning_rate": 0.002577, "loss": 1.3312, "step": 859 }, { "epoch": 0.07541492596210893, "grad_norm": 0.09912109375, "learning_rate": 0.00258, "loss": 1.3454, "step": 860 }, { "epoch": 0.07550261773648348, "grad_norm": 0.103515625, "learning_rate": 0.0025830000000000002, "loss": 1.3705, "step": 861 }, { "epoch": 0.07559030951085802, "grad_norm": 0.11181640625, "learning_rate": 0.002586, "loss": 1.2941, "step": 862 }, { "epoch": 0.07567800128523257, "grad_norm": 0.0947265625, "learning_rate": 0.002589, "loss": 1.3392, "step": 863 }, { "epoch": 0.07576569305960712, "grad_norm": 0.1357421875, "learning_rate": 0.002592, "loss": 1.3258, "step": 864 }, { "epoch": 0.07585338483398166, "grad_norm": 0.07568359375, "learning_rate": 0.002595, "loss": 1.288, "step": 865 }, { "epoch": 0.07594107660835621, "grad_norm": 0.17578125, "learning_rate": 0.002598, "loss": 1.4316, "step": 866 }, { "epoch": 0.07602876838273075, "grad_norm": 0.130859375, "learning_rate": 0.002601, "loss": 1.3078, "step": 867 }, { "epoch": 0.0761164601571053, "grad_norm": 0.1552734375, "learning_rate": 0.002604, "loss": 1.2932, "step": 868 }, { "epoch": 0.07620415193147984, "grad_norm": 0.1669921875, "learning_rate": 0.002607, "loss": 1.3165, "step": 869 }, { "epoch": 0.07629184370585439, "grad_norm": 0.123046875, "learning_rate": 0.00261, "loss": 1.3193, "step": 870 }, { "epoch": 0.07637953548022892, "grad_norm": 0.09765625, "learning_rate": 0.002613, "loss": 1.3277, "step": 871 }, { "epoch": 0.07646722725460348, "grad_norm": 0.111328125, "learning_rate": 0.002616, "loss": 1.2847, "step": 872 }, { "epoch": 0.07655491902897803, "grad_norm": 0.12158203125, "learning_rate": 0.0026190000000000002, "loss": 1.335, "step": 873 }, { "epoch": 0.07664261080335256, "grad_norm": 0.09130859375, "learning_rate": 0.002622, "loss": 1.2934, "step": 874 }, { "epoch": 0.07673030257772712, "grad_norm": 0.11181640625, "learning_rate": 0.002625, "loss": 1.3118, "step": 875 }, { "epoch": 0.07681799435210165, "grad_norm": 0.08837890625, "learning_rate": 0.002628, "loss": 1.268, "step": 876 }, { "epoch": 0.0769056861264762, "grad_norm": 0.1630859375, "learning_rate": 0.002631, "loss": 1.3718, "step": 877 }, { "epoch": 0.07699337790085074, "grad_norm": 0.087890625, "learning_rate": 0.002634, "loss": 1.347, "step": 878 }, { "epoch": 0.0770810696752253, "grad_norm": 0.1025390625, "learning_rate": 0.002637, "loss": 1.321, "step": 879 }, { "epoch": 0.07716876144959983, "grad_norm": 0.10498046875, "learning_rate": 0.00264, "loss": 1.3698, "step": 880 }, { "epoch": 0.07725645322397438, "grad_norm": 0.1083984375, "learning_rate": 0.002643, "loss": 1.3034, "step": 881 }, { "epoch": 0.07734414499834893, "grad_norm": 0.08251953125, "learning_rate": 0.002646, "loss": 1.3008, "step": 882 }, { "epoch": 0.07743183677272347, "grad_norm": 0.107421875, "learning_rate": 0.002649, "loss": 1.3692, "step": 883 }, { "epoch": 0.07751952854709802, "grad_norm": 0.1044921875, "learning_rate": 0.0026520000000000003, "loss": 1.3591, "step": 884 }, { "epoch": 0.07760722032147256, "grad_norm": 0.0732421875, "learning_rate": 0.0026550000000000002, "loss": 1.2839, "step": 885 }, { "epoch": 0.07769491209584711, "grad_norm": 0.10107421875, "learning_rate": 0.002658, "loss": 1.3315, "step": 886 }, { "epoch": 0.07778260387022165, "grad_norm": 0.10595703125, "learning_rate": 0.002661, "loss": 1.306, "step": 887 }, { "epoch": 0.0778702956445962, "grad_norm": 0.103515625, "learning_rate": 0.002664, "loss": 1.3655, "step": 888 }, { "epoch": 0.07795798741897074, "grad_norm": 0.0908203125, "learning_rate": 0.002667, "loss": 1.3113, "step": 889 }, { "epoch": 0.07804567919334529, "grad_norm": 0.1884765625, "learning_rate": 0.00267, "loss": 1.3352, "step": 890 }, { "epoch": 0.07813337096771984, "grad_norm": 0.1689453125, "learning_rate": 0.002673, "loss": 1.2859, "step": 891 }, { "epoch": 0.07822106274209438, "grad_norm": 0.10205078125, "learning_rate": 0.002676, "loss": 1.3539, "step": 892 }, { "epoch": 0.07830875451646893, "grad_norm": 0.234375, "learning_rate": 0.002679, "loss": 1.3543, "step": 893 }, { "epoch": 0.07839644629084347, "grad_norm": 0.2080078125, "learning_rate": 0.002682, "loss": 1.3276, "step": 894 }, { "epoch": 0.07848413806521802, "grad_norm": 0.10302734375, "learning_rate": 0.0026850000000000003, "loss": 1.3294, "step": 895 }, { "epoch": 0.07857182983959256, "grad_norm": 0.111328125, "learning_rate": 0.0026880000000000003, "loss": 1.2468, "step": 896 }, { "epoch": 0.07865952161396711, "grad_norm": 0.1845703125, "learning_rate": 0.0026910000000000002, "loss": 1.3238, "step": 897 }, { "epoch": 0.07874721338834165, "grad_norm": 0.1767578125, "learning_rate": 0.002694, "loss": 1.3838, "step": 898 }, { "epoch": 0.0788349051627162, "grad_norm": 0.205078125, "learning_rate": 0.002697, "loss": 1.3141, "step": 899 }, { "epoch": 0.07892259693709075, "grad_norm": 0.1455078125, "learning_rate": 0.0027, "loss": 1.2963, "step": 900 }, { "epoch": 0.07901028871146529, "grad_norm": 0.15234375, "learning_rate": 0.002703, "loss": 1.256, "step": 901 }, { "epoch": 0.07909798048583984, "grad_norm": 0.1259765625, "learning_rate": 0.002706, "loss": 1.3802, "step": 902 }, { "epoch": 0.07918567226021438, "grad_norm": 0.10693359375, "learning_rate": 0.002709, "loss": 1.2523, "step": 903 }, { "epoch": 0.07927336403458893, "grad_norm": 0.11083984375, "learning_rate": 0.002712, "loss": 1.2966, "step": 904 }, { "epoch": 0.07936105580896347, "grad_norm": 0.1103515625, "learning_rate": 0.002715, "loss": 1.2996, "step": 905 }, { "epoch": 0.07944874758333802, "grad_norm": 0.10986328125, "learning_rate": 0.002718, "loss": 1.3653, "step": 906 }, { "epoch": 0.07953643935771255, "grad_norm": 0.1396484375, "learning_rate": 0.0027210000000000003, "loss": 1.3168, "step": 907 }, { "epoch": 0.0796241311320871, "grad_norm": 0.095703125, "learning_rate": 0.0027240000000000003, "loss": 1.4055, "step": 908 }, { "epoch": 0.07971182290646166, "grad_norm": 0.1357421875, "learning_rate": 0.0027270000000000003, "loss": 1.3193, "step": 909 }, { "epoch": 0.0797995146808362, "grad_norm": 0.09912109375, "learning_rate": 0.0027300000000000002, "loss": 1.3323, "step": 910 }, { "epoch": 0.07988720645521075, "grad_norm": 0.193359375, "learning_rate": 0.002733, "loss": 1.361, "step": 911 }, { "epoch": 0.07997489822958528, "grad_norm": 0.255859375, "learning_rate": 0.002736, "loss": 1.3238, "step": 912 }, { "epoch": 0.08006259000395984, "grad_norm": 0.1005859375, "learning_rate": 0.002739, "loss": 1.3205, "step": 913 }, { "epoch": 0.08015028177833437, "grad_norm": 0.111328125, "learning_rate": 0.002742, "loss": 1.2774, "step": 914 }, { "epoch": 0.08023797355270892, "grad_norm": 0.09521484375, "learning_rate": 0.002745, "loss": 1.3301, "step": 915 }, { "epoch": 0.08032566532708346, "grad_norm": 0.09423828125, "learning_rate": 0.002748, "loss": 1.3214, "step": 916 }, { "epoch": 0.08041335710145801, "grad_norm": 0.1416015625, "learning_rate": 0.002751, "loss": 1.3142, "step": 917 }, { "epoch": 0.08050104887583256, "grad_norm": 0.07470703125, "learning_rate": 0.0027540000000000004, "loss": 1.3766, "step": 918 }, { "epoch": 0.0805887406502071, "grad_norm": 0.11376953125, "learning_rate": 0.0027570000000000003, "loss": 1.2802, "step": 919 }, { "epoch": 0.08067643242458165, "grad_norm": 0.087890625, "learning_rate": 0.0027600000000000003, "loss": 1.3042, "step": 920 }, { "epoch": 0.08076412419895619, "grad_norm": 0.10986328125, "learning_rate": 0.0027630000000000003, "loss": 1.3302, "step": 921 }, { "epoch": 0.08085181597333074, "grad_norm": 0.1044921875, "learning_rate": 0.0027660000000000002, "loss": 1.3524, "step": 922 }, { "epoch": 0.08093950774770528, "grad_norm": 0.10888671875, "learning_rate": 0.002769, "loss": 1.229, "step": 923 }, { "epoch": 0.08102719952207983, "grad_norm": 0.10107421875, "learning_rate": 0.002772, "loss": 1.334, "step": 924 }, { "epoch": 0.08111489129645437, "grad_norm": 0.1318359375, "learning_rate": 0.002775, "loss": 1.3263, "step": 925 }, { "epoch": 0.08120258307082892, "grad_norm": 0.201171875, "learning_rate": 0.002778, "loss": 1.3455, "step": 926 }, { "epoch": 0.08129027484520347, "grad_norm": 0.0849609375, "learning_rate": 0.002781, "loss": 1.3536, "step": 927 }, { "epoch": 0.08137796661957801, "grad_norm": 0.11865234375, "learning_rate": 0.002784, "loss": 1.3306, "step": 928 }, { "epoch": 0.08146565839395256, "grad_norm": 0.0771484375, "learning_rate": 0.0027870000000000004, "loss": 1.3462, "step": 929 }, { "epoch": 0.0815533501683271, "grad_norm": 0.12890625, "learning_rate": 0.0027900000000000004, "loss": 1.2984, "step": 930 }, { "epoch": 0.08164104194270165, "grad_norm": 0.10205078125, "learning_rate": 0.0027930000000000003, "loss": 1.3302, "step": 931 }, { "epoch": 0.08172873371707619, "grad_norm": 0.1005859375, "learning_rate": 0.0027960000000000003, "loss": 1.356, "step": 932 }, { "epoch": 0.08181642549145074, "grad_norm": 0.091796875, "learning_rate": 0.0027990000000000003, "loss": 1.3102, "step": 933 }, { "epoch": 0.08190411726582528, "grad_norm": 0.10205078125, "learning_rate": 0.0028020000000000002, "loss": 1.4226, "step": 934 }, { "epoch": 0.08199180904019983, "grad_norm": 0.0869140625, "learning_rate": 0.002805, "loss": 1.3052, "step": 935 }, { "epoch": 0.08207950081457437, "grad_norm": 0.095703125, "learning_rate": 0.002808, "loss": 1.3552, "step": 936 }, { "epoch": 0.08216719258894892, "grad_norm": 0.07958984375, "learning_rate": 0.002811, "loss": 1.3334, "step": 937 }, { "epoch": 0.08225488436332347, "grad_norm": 0.15234375, "learning_rate": 0.002814, "loss": 1.3238, "step": 938 }, { "epoch": 0.082342576137698, "grad_norm": 0.08154296875, "learning_rate": 0.002817, "loss": 1.2994, "step": 939 }, { "epoch": 0.08243026791207256, "grad_norm": 0.12158203125, "learning_rate": 0.00282, "loss": 1.2673, "step": 940 }, { "epoch": 0.0825179596864471, "grad_norm": 0.126953125, "learning_rate": 0.002823, "loss": 1.2656, "step": 941 }, { "epoch": 0.08260565146082165, "grad_norm": 0.10009765625, "learning_rate": 0.002826, "loss": 1.3713, "step": 942 }, { "epoch": 0.08269334323519618, "grad_norm": 0.27734375, "learning_rate": 0.002829, "loss": 1.3689, "step": 943 }, { "epoch": 0.08278103500957074, "grad_norm": 0.1533203125, "learning_rate": 0.002832, "loss": 1.3078, "step": 944 }, { "epoch": 0.08286872678394527, "grad_norm": 0.201171875, "learning_rate": 0.002835, "loss": 1.3463, "step": 945 }, { "epoch": 0.08295641855831982, "grad_norm": 0.2109375, "learning_rate": 0.002838, "loss": 1.2728, "step": 946 }, { "epoch": 0.08304411033269438, "grad_norm": 0.10205078125, "learning_rate": 0.0028409999999999998, "loss": 1.2866, "step": 947 }, { "epoch": 0.08313180210706891, "grad_norm": 0.09375, "learning_rate": 0.0028439999999999997, "loss": 1.3144, "step": 948 }, { "epoch": 0.08321949388144347, "grad_norm": 0.10986328125, "learning_rate": 0.002847, "loss": 1.2829, "step": 949 }, { "epoch": 0.083307185655818, "grad_norm": 0.07763671875, "learning_rate": 0.00285, "loss": 1.3705, "step": 950 }, { "epoch": 0.08339487743019255, "grad_norm": 0.107421875, "learning_rate": 0.002853, "loss": 1.3597, "step": 951 }, { "epoch": 0.08348256920456709, "grad_norm": 0.10791015625, "learning_rate": 0.002856, "loss": 1.3528, "step": 952 }, { "epoch": 0.08357026097894164, "grad_norm": 0.126953125, "learning_rate": 0.002859, "loss": 1.3411, "step": 953 }, { "epoch": 0.08365795275331618, "grad_norm": 0.1376953125, "learning_rate": 0.002862, "loss": 1.3656, "step": 954 }, { "epoch": 0.08374564452769073, "grad_norm": 0.259765625, "learning_rate": 0.002865, "loss": 1.3907, "step": 955 }, { "epoch": 0.08383333630206528, "grad_norm": 0.10986328125, "learning_rate": 0.002868, "loss": 1.3499, "step": 956 }, { "epoch": 0.08392102807643982, "grad_norm": 0.13671875, "learning_rate": 0.002871, "loss": 1.326, "step": 957 }, { "epoch": 0.08400871985081437, "grad_norm": 0.11083984375, "learning_rate": 0.002874, "loss": 1.2958, "step": 958 }, { "epoch": 0.08409641162518891, "grad_norm": 0.087890625, "learning_rate": 0.002877, "loss": 1.2943, "step": 959 }, { "epoch": 0.08418410339956346, "grad_norm": 0.15625, "learning_rate": 0.0028799999999999997, "loss": 1.2968, "step": 960 }, { "epoch": 0.084271795173938, "grad_norm": 0.119140625, "learning_rate": 0.002883, "loss": 1.3424, "step": 961 }, { "epoch": 0.08435948694831255, "grad_norm": 0.130859375, "learning_rate": 0.002886, "loss": 1.326, "step": 962 }, { "epoch": 0.08444717872268709, "grad_norm": 0.1845703125, "learning_rate": 0.002889, "loss": 1.3498, "step": 963 }, { "epoch": 0.08453487049706164, "grad_norm": 0.1787109375, "learning_rate": 0.002892, "loss": 1.2791, "step": 964 }, { "epoch": 0.08462256227143619, "grad_norm": 0.0849609375, "learning_rate": 0.002895, "loss": 1.3582, "step": 965 }, { "epoch": 0.08471025404581073, "grad_norm": 0.1337890625, "learning_rate": 0.002898, "loss": 1.3123, "step": 966 }, { "epoch": 0.08479794582018528, "grad_norm": 0.1689453125, "learning_rate": 0.002901, "loss": 1.3389, "step": 967 }, { "epoch": 0.08488563759455982, "grad_norm": 0.111328125, "learning_rate": 0.002904, "loss": 1.3073, "step": 968 }, { "epoch": 0.08497332936893437, "grad_norm": 0.388671875, "learning_rate": 0.002907, "loss": 1.3919, "step": 969 }, { "epoch": 0.08506102114330891, "grad_norm": 0.255859375, "learning_rate": 0.00291, "loss": 1.3619, "step": 970 }, { "epoch": 0.08514871291768346, "grad_norm": 0.1748046875, "learning_rate": 0.002913, "loss": 1.2457, "step": 971 }, { "epoch": 0.085236404692058, "grad_norm": 0.140625, "learning_rate": 0.002916, "loss": 1.3531, "step": 972 }, { "epoch": 0.08532409646643255, "grad_norm": 0.19921875, "learning_rate": 0.002919, "loss": 1.3493, "step": 973 }, { "epoch": 0.0854117882408071, "grad_norm": 0.1865234375, "learning_rate": 0.002922, "loss": 1.3564, "step": 974 }, { "epoch": 0.08549948001518164, "grad_norm": 0.1572265625, "learning_rate": 0.002925, "loss": 1.3556, "step": 975 }, { "epoch": 0.08558717178955619, "grad_norm": 0.10791015625, "learning_rate": 0.002928, "loss": 1.3064, "step": 976 }, { "epoch": 0.08567486356393073, "grad_norm": 0.181640625, "learning_rate": 0.002931, "loss": 1.3248, "step": 977 }, { "epoch": 0.08576255533830528, "grad_norm": 0.09619140625, "learning_rate": 0.002934, "loss": 1.2805, "step": 978 }, { "epoch": 0.08585024711267981, "grad_norm": 0.138671875, "learning_rate": 0.002937, "loss": 1.3346, "step": 979 }, { "epoch": 0.08593793888705437, "grad_norm": 0.08740234375, "learning_rate": 0.00294, "loss": 1.2495, "step": 980 }, { "epoch": 0.0860256306614289, "grad_norm": 0.10693359375, "learning_rate": 0.002943, "loss": 1.3576, "step": 981 }, { "epoch": 0.08611332243580345, "grad_norm": 0.0751953125, "learning_rate": 0.002946, "loss": 1.2991, "step": 982 }, { "epoch": 0.086201014210178, "grad_norm": 0.08935546875, "learning_rate": 0.0029490000000000002, "loss": 1.4072, "step": 983 }, { "epoch": 0.08628870598455254, "grad_norm": 0.10546875, "learning_rate": 0.002952, "loss": 1.3707, "step": 984 }, { "epoch": 0.0863763977589271, "grad_norm": 0.095703125, "learning_rate": 0.002955, "loss": 1.3455, "step": 985 }, { "epoch": 0.08646408953330163, "grad_norm": 0.099609375, "learning_rate": 0.002958, "loss": 1.3252, "step": 986 }, { "epoch": 0.08655178130767618, "grad_norm": 0.08935546875, "learning_rate": 0.002961, "loss": 1.3285, "step": 987 }, { "epoch": 0.08663947308205072, "grad_norm": 0.1328125, "learning_rate": 0.002964, "loss": 1.4067, "step": 988 }, { "epoch": 0.08672716485642527, "grad_norm": 0.10009765625, "learning_rate": 0.002967, "loss": 1.321, "step": 989 }, { "epoch": 0.08681485663079981, "grad_norm": 0.111328125, "learning_rate": 0.00297, "loss": 1.3814, "step": 990 }, { "epoch": 0.08690254840517436, "grad_norm": 0.1953125, "learning_rate": 0.002973, "loss": 1.3265, "step": 991 }, { "epoch": 0.08699024017954891, "grad_norm": 0.1865234375, "learning_rate": 0.002976, "loss": 1.4072, "step": 992 }, { "epoch": 0.08707793195392345, "grad_norm": 0.10302734375, "learning_rate": 0.002979, "loss": 1.3561, "step": 993 }, { "epoch": 0.087165623728298, "grad_norm": 0.08349609375, "learning_rate": 0.002982, "loss": 1.3047, "step": 994 }, { "epoch": 0.08725331550267254, "grad_norm": 0.0888671875, "learning_rate": 0.0029850000000000002, "loss": 1.2992, "step": 995 }, { "epoch": 0.08734100727704709, "grad_norm": 0.11572265625, "learning_rate": 0.002988, "loss": 1.3093, "step": 996 }, { "epoch": 0.08742869905142163, "grad_norm": 0.08984375, "learning_rate": 0.002991, "loss": 1.3291, "step": 997 }, { "epoch": 0.08751639082579618, "grad_norm": 0.1484375, "learning_rate": 0.002994, "loss": 1.4177, "step": 998 }, { "epoch": 0.08760408260017072, "grad_norm": 0.1513671875, "learning_rate": 0.002997, "loss": 1.2768, "step": 999 }, { "epoch": 0.08769177437454527, "grad_norm": 0.10205078125, "learning_rate": 0.003, "loss": 1.3108, "step": 1000 }, { "epoch": 0.08769177437454527, "eval_loss": 1.3424164056777954, "eval_runtime": 429.1223, "eval_samples_per_second": 33.666, "eval_steps_per_second": 8.417, "step": 1000 }, { "epoch": 0.08777946614891982, "grad_norm": 0.1044921875, "learning_rate": 0.0029999999384417424, "loss": 1.3823, "step": 1001 }, { "epoch": 0.08786715792329436, "grad_norm": 0.16015625, "learning_rate": 0.0029999997537669756, "loss": 1.4031, "step": 1002 }, { "epoch": 0.08795484969766891, "grad_norm": 0.1845703125, "learning_rate": 0.002999999445975716, "loss": 1.3088, "step": 1003 }, { "epoch": 0.08804254147204345, "grad_norm": 0.10205078125, "learning_rate": 0.0029999990150679926, "loss": 1.3848, "step": 1004 }, { "epoch": 0.088130233246418, "grad_norm": 0.16015625, "learning_rate": 0.002999998461043843, "loss": 1.3547, "step": 1005 }, { "epoch": 0.08821792502079254, "grad_norm": 0.08642578125, "learning_rate": 0.0029999977839033198, "loss": 1.2681, "step": 1006 }, { "epoch": 0.08830561679516709, "grad_norm": 0.150390625, "learning_rate": 0.0029999969836464833, "loss": 1.3275, "step": 1007 }, { "epoch": 0.08839330856954163, "grad_norm": 0.11083984375, "learning_rate": 0.002999996060273407, "loss": 1.3565, "step": 1008 }, { "epoch": 0.08848100034391618, "grad_norm": 0.08837890625, "learning_rate": 0.0029999950137841744, "loss": 1.2566, "step": 1009 }, { "epoch": 0.08856869211829073, "grad_norm": 0.087890625, "learning_rate": 0.002999993844178882, "loss": 1.3053, "step": 1010 }, { "epoch": 0.08865638389266527, "grad_norm": 0.091796875, "learning_rate": 0.002999992551457636, "loss": 1.307, "step": 1011 }, { "epoch": 0.08874407566703982, "grad_norm": 0.140625, "learning_rate": 0.002999991135620554, "loss": 1.3178, "step": 1012 }, { "epoch": 0.08883176744141436, "grad_norm": 0.0810546875, "learning_rate": 0.0029999895966677658, "loss": 1.2969, "step": 1013 }, { "epoch": 0.0889194592157889, "grad_norm": 0.083984375, "learning_rate": 0.0029999879345994113, "loss": 1.378, "step": 1014 }, { "epoch": 0.08900715099016344, "grad_norm": 0.083984375, "learning_rate": 0.002999986149415642, "loss": 1.3091, "step": 1015 }, { "epoch": 0.089094842764538, "grad_norm": 0.130859375, "learning_rate": 0.002999984241116621, "loss": 1.3379, "step": 1016 }, { "epoch": 0.08918253453891253, "grad_norm": 0.283203125, "learning_rate": 0.0029999822097025223, "loss": 1.3691, "step": 1017 }, { "epoch": 0.08927022631328708, "grad_norm": 0.166015625, "learning_rate": 0.0029999800551735304, "loss": 1.3491, "step": 1018 }, { "epoch": 0.08935791808766164, "grad_norm": 0.1025390625, "learning_rate": 0.002999977777529843, "loss": 1.3031, "step": 1019 }, { "epoch": 0.08944560986203617, "grad_norm": 0.10791015625, "learning_rate": 0.002999975376771667, "loss": 1.3721, "step": 1020 }, { "epoch": 0.08953330163641073, "grad_norm": 0.08837890625, "learning_rate": 0.0029999728528992214, "loss": 1.3064, "step": 1021 }, { "epoch": 0.08962099341078526, "grad_norm": 0.111328125, "learning_rate": 0.002999970205912737, "loss": 1.3724, "step": 1022 }, { "epoch": 0.08970868518515981, "grad_norm": 0.189453125, "learning_rate": 0.002999967435812455, "loss": 1.3757, "step": 1023 }, { "epoch": 0.08979637695953435, "grad_norm": 0.26953125, "learning_rate": 0.0029999645425986265, "loss": 1.3566, "step": 1024 }, { "epoch": 0.0898840687339089, "grad_norm": 0.109375, "learning_rate": 0.0029999615262715183, "loss": 1.393, "step": 1025 }, { "epoch": 0.08997176050828344, "grad_norm": 0.302734375, "learning_rate": 0.0029999583868314025, "loss": 1.3272, "step": 1026 }, { "epoch": 0.09005945228265799, "grad_norm": 0.224609375, "learning_rate": 0.0029999551242785674, "loss": 1.3077, "step": 1027 }, { "epoch": 0.09014714405703254, "grad_norm": 0.1357421875, "learning_rate": 0.0029999517386133097, "loss": 1.2973, "step": 1028 }, { "epoch": 0.09023483583140708, "grad_norm": 0.291015625, "learning_rate": 0.0029999482298359386, "loss": 1.3414, "step": 1029 }, { "epoch": 0.09032252760578163, "grad_norm": 0.1396484375, "learning_rate": 0.002999944597946773, "loss": 1.2904, "step": 1030 }, { "epoch": 0.09041021938015617, "grad_norm": 0.2109375, "learning_rate": 0.0029999408429461456, "loss": 1.3405, "step": 1031 }, { "epoch": 0.09049791115453072, "grad_norm": 0.2158203125, "learning_rate": 0.0029999369648343976, "loss": 1.3186, "step": 1032 }, { "epoch": 0.09058560292890526, "grad_norm": 0.11474609375, "learning_rate": 0.0029999329636118837, "loss": 1.2944, "step": 1033 }, { "epoch": 0.09067329470327981, "grad_norm": 0.1630859375, "learning_rate": 0.0029999288392789686, "loss": 1.3552, "step": 1034 }, { "epoch": 0.09076098647765435, "grad_norm": 0.10791015625, "learning_rate": 0.002999924591836028, "loss": 1.3493, "step": 1035 }, { "epoch": 0.0908486782520289, "grad_norm": 0.193359375, "learning_rate": 0.002999920221283449, "loss": 1.3785, "step": 1036 }, { "epoch": 0.09093637002640345, "grad_norm": 0.1484375, "learning_rate": 0.002999915727621631, "loss": 1.341, "step": 1037 }, { "epoch": 0.09102406180077799, "grad_norm": 0.1474609375, "learning_rate": 0.0029999111108509834, "loss": 1.3182, "step": 1038 }, { "epoch": 0.09111175357515254, "grad_norm": 0.1572265625, "learning_rate": 0.0029999063709719278, "loss": 1.3108, "step": 1039 }, { "epoch": 0.09119944534952708, "grad_norm": 0.1376953125, "learning_rate": 0.002999901507984895, "loss": 1.3294, "step": 1040 }, { "epoch": 0.09128713712390163, "grad_norm": 0.1669921875, "learning_rate": 0.0029998965218903297, "loss": 1.2971, "step": 1041 }, { "epoch": 0.09137482889827617, "grad_norm": 0.10791015625, "learning_rate": 0.0029998914126886864, "loss": 1.3484, "step": 1042 }, { "epoch": 0.09146252067265072, "grad_norm": 0.1005859375, "learning_rate": 0.002999886180380431, "loss": 1.3318, "step": 1043 }, { "epoch": 0.09155021244702526, "grad_norm": 0.1171875, "learning_rate": 0.0029998808249660407, "loss": 1.2998, "step": 1044 }, { "epoch": 0.09163790422139981, "grad_norm": 0.10546875, "learning_rate": 0.0029998753464460038, "loss": 1.3707, "step": 1045 }, { "epoch": 0.09172559599577436, "grad_norm": 0.08837890625, "learning_rate": 0.0029998697448208205, "loss": 1.348, "step": 1046 }, { "epoch": 0.0918132877701489, "grad_norm": 0.171875, "learning_rate": 0.0029998640200910006, "loss": 1.3921, "step": 1047 }, { "epoch": 0.09190097954452345, "grad_norm": 0.2060546875, "learning_rate": 0.002999858172257067, "loss": 1.368, "step": 1048 }, { "epoch": 0.09198867131889799, "grad_norm": 0.09814453125, "learning_rate": 0.0029998522013195525, "loss": 1.3864, "step": 1049 }, { "epoch": 0.09207636309327254, "grad_norm": 0.1025390625, "learning_rate": 0.0029998461072790017, "loss": 1.3486, "step": 1050 }, { "epoch": 0.09216405486764707, "grad_norm": 0.08203125, "learning_rate": 0.002999839890135971, "loss": 1.3157, "step": 1051 }, { "epoch": 0.09225174664202163, "grad_norm": 0.072265625, "learning_rate": 0.0029998335498910263, "loss": 1.3175, "step": 1052 }, { "epoch": 0.09233943841639616, "grad_norm": 0.09375, "learning_rate": 0.002999827086544747, "loss": 1.4045, "step": 1053 }, { "epoch": 0.09242713019077071, "grad_norm": 0.09130859375, "learning_rate": 0.0029998205000977213, "loss": 1.3117, "step": 1054 }, { "epoch": 0.09251482196514527, "grad_norm": 0.08056640625, "learning_rate": 0.0029998137905505513, "loss": 1.3054, "step": 1055 }, { "epoch": 0.0926025137395198, "grad_norm": 0.09228515625, "learning_rate": 0.0029998069579038476, "loss": 1.342, "step": 1056 }, { "epoch": 0.09269020551389436, "grad_norm": 0.1083984375, "learning_rate": 0.0029998000021582345, "loss": 1.2806, "step": 1057 }, { "epoch": 0.09277789728826889, "grad_norm": 0.1337890625, "learning_rate": 0.002999792923314345, "loss": 1.3156, "step": 1058 }, { "epoch": 0.09286558906264344, "grad_norm": 0.0966796875, "learning_rate": 0.0029997857213728257, "loss": 1.3024, "step": 1059 }, { "epoch": 0.09295328083701798, "grad_norm": 0.103515625, "learning_rate": 0.0029997783963343332, "loss": 1.3808, "step": 1060 }, { "epoch": 0.09304097261139253, "grad_norm": 0.181640625, "learning_rate": 0.002999770948199535, "loss": 1.3611, "step": 1061 }, { "epoch": 0.09312866438576707, "grad_norm": 0.11572265625, "learning_rate": 0.002999763376969111, "loss": 1.3256, "step": 1062 }, { "epoch": 0.09321635616014162, "grad_norm": 0.12353515625, "learning_rate": 0.002999755682643751, "loss": 1.3321, "step": 1063 }, { "epoch": 0.09330404793451617, "grad_norm": 0.18359375, "learning_rate": 0.0029997478652241576, "loss": 1.372, "step": 1064 }, { "epoch": 0.09339173970889071, "grad_norm": 0.11865234375, "learning_rate": 0.002999739924711043, "loss": 1.3459, "step": 1065 }, { "epoch": 0.09347943148326526, "grad_norm": 0.1416015625, "learning_rate": 0.002999731861105132, "loss": 1.3769, "step": 1066 }, { "epoch": 0.0935671232576398, "grad_norm": 0.259765625, "learning_rate": 0.002999723674407159, "loss": 1.3364, "step": 1067 }, { "epoch": 0.09365481503201435, "grad_norm": 0.1416015625, "learning_rate": 0.0029997153646178715, "loss": 1.3105, "step": 1068 }, { "epoch": 0.09374250680638889, "grad_norm": 0.08251953125, "learning_rate": 0.0029997069317380274, "loss": 1.3385, "step": 1069 }, { "epoch": 0.09383019858076344, "grad_norm": 0.11962890625, "learning_rate": 0.002999698375768395, "loss": 1.3305, "step": 1070 }, { "epoch": 0.09391789035513798, "grad_norm": 0.08642578125, "learning_rate": 0.002999689696709755, "loss": 1.3452, "step": 1071 }, { "epoch": 0.09400558212951253, "grad_norm": 0.1640625, "learning_rate": 0.0029996808945628984, "loss": 1.3774, "step": 1072 }, { "epoch": 0.09409327390388708, "grad_norm": 0.2099609375, "learning_rate": 0.002999671969328629, "loss": 1.3165, "step": 1073 }, { "epoch": 0.09418096567826162, "grad_norm": 0.0673828125, "learning_rate": 0.00299966292100776, "loss": 1.3589, "step": 1074 }, { "epoch": 0.09426865745263617, "grad_norm": 0.26171875, "learning_rate": 0.0029996537496011166, "loss": 1.3527, "step": 1075 }, { "epoch": 0.09435634922701071, "grad_norm": 0.19921875, "learning_rate": 0.0029996444551095352, "loss": 1.3123, "step": 1076 }, { "epoch": 0.09444404100138526, "grad_norm": 0.1767578125, "learning_rate": 0.002999635037533864, "loss": 1.3793, "step": 1077 }, { "epoch": 0.0945317327757598, "grad_norm": 0.30078125, "learning_rate": 0.0029996254968749614, "loss": 1.3356, "step": 1078 }, { "epoch": 0.09461942455013435, "grad_norm": 0.115234375, "learning_rate": 0.002999615833133697, "loss": 1.36, "step": 1079 }, { "epoch": 0.09470711632450889, "grad_norm": 0.19921875, "learning_rate": 0.0029996060463109535, "loss": 1.2863, "step": 1080 }, { "epoch": 0.09479480809888344, "grad_norm": 0.1171875, "learning_rate": 0.002999596136407622, "loss": 1.3104, "step": 1081 }, { "epoch": 0.09488249987325799, "grad_norm": 0.1083984375, "learning_rate": 0.002999586103424607, "loss": 1.2665, "step": 1082 }, { "epoch": 0.09497019164763253, "grad_norm": 0.1416015625, "learning_rate": 0.0029995759473628227, "loss": 1.3407, "step": 1083 }, { "epoch": 0.09505788342200708, "grad_norm": 0.09716796875, "learning_rate": 0.0029995656682231964, "loss": 1.3338, "step": 1084 }, { "epoch": 0.09514557519638162, "grad_norm": 0.1484375, "learning_rate": 0.0029995552660066656, "loss": 1.3139, "step": 1085 }, { "epoch": 0.09523326697075617, "grad_norm": 0.11328125, "learning_rate": 0.0029995447407141777, "loss": 1.3548, "step": 1086 }, { "epoch": 0.0953209587451307, "grad_norm": 0.10986328125, "learning_rate": 0.0029995340923466935, "loss": 1.268, "step": 1087 }, { "epoch": 0.09540865051950526, "grad_norm": 0.08642578125, "learning_rate": 0.0029995233209051835, "loss": 1.3708, "step": 1088 }, { "epoch": 0.0954963422938798, "grad_norm": 0.158203125, "learning_rate": 0.002999512426390631, "loss": 1.2914, "step": 1089 }, { "epoch": 0.09558403406825434, "grad_norm": 0.07958984375, "learning_rate": 0.0029995014088040287, "loss": 1.3179, "step": 1090 }, { "epoch": 0.0956717258426289, "grad_norm": 0.16796875, "learning_rate": 0.0029994902681463815, "loss": 1.3853, "step": 1091 }, { "epoch": 0.09575941761700343, "grad_norm": 0.111328125, "learning_rate": 0.0029994790044187056, "loss": 1.2732, "step": 1092 }, { "epoch": 0.09584710939137799, "grad_norm": 0.0751953125, "learning_rate": 0.002999467617622028, "loss": 1.2913, "step": 1093 }, { "epoch": 0.09593480116575252, "grad_norm": 0.154296875, "learning_rate": 0.002999456107757388, "loss": 1.399, "step": 1094 }, { "epoch": 0.09602249294012707, "grad_norm": 0.1865234375, "learning_rate": 0.0029994444748258344, "loss": 1.364, "step": 1095 }, { "epoch": 0.09611018471450161, "grad_norm": 0.08203125, "learning_rate": 0.0029994327188284276, "loss": 1.363, "step": 1096 }, { "epoch": 0.09619787648887616, "grad_norm": 0.13671875, "learning_rate": 0.002999420839766241, "loss": 1.3517, "step": 1097 }, { "epoch": 0.0962855682632507, "grad_norm": 0.08203125, "learning_rate": 0.002999408837640357, "loss": 1.3214, "step": 1098 }, { "epoch": 0.09637326003762525, "grad_norm": 0.087890625, "learning_rate": 0.0029993967124518706, "loss": 1.2575, "step": 1099 }, { "epoch": 0.0964609518119998, "grad_norm": 0.1357421875, "learning_rate": 0.0029993844642018873, "loss": 1.2762, "step": 1100 }, { "epoch": 0.09654864358637434, "grad_norm": 0.09033203125, "learning_rate": 0.0029993720928915245, "loss": 1.3552, "step": 1101 }, { "epoch": 0.09663633536074889, "grad_norm": 0.10546875, "learning_rate": 0.0029993595985219105, "loss": 1.3962, "step": 1102 }, { "epoch": 0.09672402713512343, "grad_norm": 0.09033203125, "learning_rate": 0.0029993469810941837, "loss": 1.3563, "step": 1103 }, { "epoch": 0.09681171890949798, "grad_norm": 0.11865234375, "learning_rate": 0.0029993342406094965, "loss": 1.2809, "step": 1104 }, { "epoch": 0.09689941068387252, "grad_norm": 0.0947265625, "learning_rate": 0.002999321377069009, "loss": 1.3422, "step": 1105 }, { "epoch": 0.09698710245824707, "grad_norm": 0.173828125, "learning_rate": 0.0029993083904738954, "loss": 1.333, "step": 1106 }, { "epoch": 0.09707479423262161, "grad_norm": 0.166015625, "learning_rate": 0.00299929528082534, "loss": 1.2935, "step": 1107 }, { "epoch": 0.09716248600699616, "grad_norm": 0.1328125, "learning_rate": 0.0029992820481245385, "loss": 1.3219, "step": 1108 }, { "epoch": 0.09725017778137071, "grad_norm": 0.1083984375, "learning_rate": 0.0029992686923726963, "loss": 1.3029, "step": 1109 }, { "epoch": 0.09733786955574525, "grad_norm": 0.09814453125, "learning_rate": 0.0029992552135710334, "loss": 1.265, "step": 1110 }, { "epoch": 0.0974255613301198, "grad_norm": 0.12451171875, "learning_rate": 0.0029992416117207775, "loss": 1.3016, "step": 1111 }, { "epoch": 0.09751325310449434, "grad_norm": 0.08154296875, "learning_rate": 0.00299922788682317, "loss": 1.3026, "step": 1112 }, { "epoch": 0.09760094487886889, "grad_norm": 0.10888671875, "learning_rate": 0.0029992140388794626, "loss": 1.3753, "step": 1113 }, { "epoch": 0.09768863665324343, "grad_norm": 0.09375, "learning_rate": 0.0029992000678909173, "loss": 1.3631, "step": 1114 }, { "epoch": 0.09777632842761798, "grad_norm": 0.08935546875, "learning_rate": 0.002999185973858809, "loss": 1.3922, "step": 1115 }, { "epoch": 0.09786402020199252, "grad_norm": 0.08447265625, "learning_rate": 0.0029991717567844226, "loss": 1.2998, "step": 1116 }, { "epoch": 0.09795171197636707, "grad_norm": 0.10888671875, "learning_rate": 0.002999157416669055, "loss": 1.2946, "step": 1117 }, { "epoch": 0.09803940375074162, "grad_norm": 0.08203125, "learning_rate": 0.002999142953514014, "loss": 1.3215, "step": 1118 }, { "epoch": 0.09812709552511616, "grad_norm": 0.1064453125, "learning_rate": 0.002999128367320618, "loss": 1.407, "step": 1119 }, { "epoch": 0.09821478729949071, "grad_norm": 0.1298828125, "learning_rate": 0.002999113658090198, "loss": 1.3248, "step": 1120 }, { "epoch": 0.09830247907386525, "grad_norm": 0.08740234375, "learning_rate": 0.002999098825824095, "loss": 1.3874, "step": 1121 }, { "epoch": 0.0983901708482398, "grad_norm": 0.08984375, "learning_rate": 0.0029990838705236614, "loss": 1.3076, "step": 1122 }, { "epoch": 0.09847786262261433, "grad_norm": 0.08740234375, "learning_rate": 0.002999068792190262, "loss": 1.3557, "step": 1123 }, { "epoch": 0.09856555439698889, "grad_norm": 0.10986328125, "learning_rate": 0.0029990535908252713, "loss": 1.2909, "step": 1124 }, { "epoch": 0.09865324617136342, "grad_norm": 0.10546875, "learning_rate": 0.0029990382664300754, "loss": 1.2819, "step": 1125 }, { "epoch": 0.09874093794573797, "grad_norm": 0.140625, "learning_rate": 0.0029990228190060722, "loss": 1.3976, "step": 1126 }, { "epoch": 0.09882862972011253, "grad_norm": 0.11669921875, "learning_rate": 0.0029990072485546705, "loss": 1.3246, "step": 1127 }, { "epoch": 0.09891632149448706, "grad_norm": 0.1376953125, "learning_rate": 0.00299899155507729, "loss": 1.4496, "step": 1128 }, { "epoch": 0.09900401326886162, "grad_norm": 0.376953125, "learning_rate": 0.002998975738575362, "loss": 1.3328, "step": 1129 }, { "epoch": 0.09909170504323615, "grad_norm": 0.25390625, "learning_rate": 0.002998959799050329, "loss": 1.3249, "step": 1130 }, { "epoch": 0.0991793968176107, "grad_norm": 0.12060546875, "learning_rate": 0.0029989437365036454, "loss": 1.2959, "step": 1131 }, { "epoch": 0.09926708859198524, "grad_norm": 0.13671875, "learning_rate": 0.0029989275509367747, "loss": 1.3706, "step": 1132 }, { "epoch": 0.0993547803663598, "grad_norm": 0.09423828125, "learning_rate": 0.0029989112423511933, "loss": 1.312, "step": 1133 }, { "epoch": 0.09944247214073433, "grad_norm": 0.08740234375, "learning_rate": 0.0029988948107483896, "loss": 1.3564, "step": 1134 }, { "epoch": 0.09953016391510888, "grad_norm": 0.099609375, "learning_rate": 0.0029988782561298608, "loss": 1.3321, "step": 1135 }, { "epoch": 0.09961785568948343, "grad_norm": 0.1171875, "learning_rate": 0.002998861578497117, "loss": 1.3139, "step": 1136 }, { "epoch": 0.09970554746385797, "grad_norm": 0.1396484375, "learning_rate": 0.00299884477785168, "loss": 1.2867, "step": 1137 }, { "epoch": 0.09979323923823252, "grad_norm": 0.0966796875, "learning_rate": 0.0029988278541950805, "loss": 1.2831, "step": 1138 }, { "epoch": 0.09988093101260706, "grad_norm": 0.08544921875, "learning_rate": 0.002998810807528863, "loss": 1.3432, "step": 1139 }, { "epoch": 0.09996862278698161, "grad_norm": 0.0830078125, "learning_rate": 0.0029987936378545813, "loss": 1.3169, "step": 1140 }, { "epoch": 0.10005631456135615, "grad_norm": 0.0849609375, "learning_rate": 0.0029987763451738026, "loss": 1.3212, "step": 1141 }, { "epoch": 0.1001440063357307, "grad_norm": 0.087890625, "learning_rate": 0.0029987589294881026, "loss": 1.382, "step": 1142 }, { "epoch": 0.10023169811010524, "grad_norm": 0.111328125, "learning_rate": 0.0029987413907990703, "loss": 1.2966, "step": 1143 }, { "epoch": 0.10031938988447979, "grad_norm": 0.1455078125, "learning_rate": 0.0029987237291083046, "loss": 1.2973, "step": 1144 }, { "epoch": 0.10040708165885433, "grad_norm": 0.1044921875, "learning_rate": 0.002998705944417417, "loss": 1.3291, "step": 1145 }, { "epoch": 0.10049477343322888, "grad_norm": 0.11474609375, "learning_rate": 0.002998688036728028, "loss": 1.3089, "step": 1146 }, { "epoch": 0.10058246520760343, "grad_norm": 0.076171875, "learning_rate": 0.0029986700060417723, "loss": 1.2835, "step": 1147 }, { "epoch": 0.10067015698197797, "grad_norm": 0.11474609375, "learning_rate": 0.0029986518523602936, "loss": 1.3405, "step": 1148 }, { "epoch": 0.10075784875635252, "grad_norm": 0.0966796875, "learning_rate": 0.0029986335756852474, "loss": 1.3233, "step": 1149 }, { "epoch": 0.10084554053072706, "grad_norm": 0.09912109375, "learning_rate": 0.002998615176018301, "loss": 1.282, "step": 1150 }, { "epoch": 0.10093323230510161, "grad_norm": 0.08203125, "learning_rate": 0.0029985966533611313, "loss": 1.3426, "step": 1151 }, { "epoch": 0.10102092407947615, "grad_norm": 0.0859375, "learning_rate": 0.0029985780077154286, "loss": 1.2735, "step": 1152 }, { "epoch": 0.1011086158538507, "grad_norm": 0.146484375, "learning_rate": 0.002998559239082893, "loss": 1.3627, "step": 1153 }, { "epoch": 0.10119630762822523, "grad_norm": 0.162109375, "learning_rate": 0.002998540347465236, "loss": 1.2934, "step": 1154 }, { "epoch": 0.10128399940259979, "grad_norm": 0.12255859375, "learning_rate": 0.0029985213328641803, "loss": 1.3252, "step": 1155 }, { "epoch": 0.10137169117697434, "grad_norm": 0.1728515625, "learning_rate": 0.0029985021952814604, "loss": 1.2824, "step": 1156 }, { "epoch": 0.10145938295134888, "grad_norm": 0.10302734375, "learning_rate": 0.0029984829347188212, "loss": 1.356, "step": 1157 }, { "epoch": 0.10154707472572343, "grad_norm": 0.326171875, "learning_rate": 0.00299846355117802, "loss": 1.3524, "step": 1158 }, { "epoch": 0.10163476650009796, "grad_norm": 0.19140625, "learning_rate": 0.0029984440446608235, "loss": 1.3321, "step": 1159 }, { "epoch": 0.10172245827447252, "grad_norm": 0.2216796875, "learning_rate": 0.0029984244151690116, "loss": 1.3648, "step": 1160 }, { "epoch": 0.10181015004884705, "grad_norm": 0.1787109375, "learning_rate": 0.0029984046627043737, "loss": 1.324, "step": 1161 }, { "epoch": 0.1018978418232216, "grad_norm": 0.1171875, "learning_rate": 0.0029983847872687114, "loss": 1.3589, "step": 1162 }, { "epoch": 0.10198553359759614, "grad_norm": 0.09716796875, "learning_rate": 0.002998364788863837, "loss": 1.3402, "step": 1163 }, { "epoch": 0.1020732253719707, "grad_norm": 0.10205078125, "learning_rate": 0.002998344667491575, "loss": 1.3162, "step": 1164 }, { "epoch": 0.10216091714634525, "grad_norm": 0.126953125, "learning_rate": 0.00299832442315376, "loss": 1.3257, "step": 1165 }, { "epoch": 0.10224860892071978, "grad_norm": 0.09521484375, "learning_rate": 0.0029983040558522384, "loss": 1.4214, "step": 1166 }, { "epoch": 0.10233630069509433, "grad_norm": 0.1357421875, "learning_rate": 0.0029982835655888674, "loss": 1.348, "step": 1167 }, { "epoch": 0.10242399246946887, "grad_norm": 0.10546875, "learning_rate": 0.002998262952365516, "loss": 1.2412, "step": 1168 }, { "epoch": 0.10251168424384342, "grad_norm": 0.10888671875, "learning_rate": 0.0029982422161840636, "loss": 1.3103, "step": 1169 }, { "epoch": 0.10259937601821796, "grad_norm": 0.1103515625, "learning_rate": 0.0029982213570464017, "loss": 1.3279, "step": 1170 }, { "epoch": 0.10268706779259251, "grad_norm": 0.109375, "learning_rate": 0.0029982003749544324, "loss": 1.34, "step": 1171 }, { "epoch": 0.10277475956696705, "grad_norm": 0.0966796875, "learning_rate": 0.0029981792699100692, "loss": 1.3723, "step": 1172 }, { "epoch": 0.1028624513413416, "grad_norm": 0.099609375, "learning_rate": 0.0029981580419152372, "loss": 1.2719, "step": 1173 }, { "epoch": 0.10295014311571615, "grad_norm": 0.06982421875, "learning_rate": 0.0029981366909718715, "loss": 1.2699, "step": 1174 }, { "epoch": 0.10303783489009069, "grad_norm": 0.1015625, "learning_rate": 0.0029981152170819206, "loss": 1.3496, "step": 1175 }, { "epoch": 0.10312552666446524, "grad_norm": 0.1552734375, "learning_rate": 0.0029980936202473416, "loss": 1.3897, "step": 1176 }, { "epoch": 0.10321321843883978, "grad_norm": 0.2099609375, "learning_rate": 0.002998071900470104, "loss": 1.3579, "step": 1177 }, { "epoch": 0.10330091021321433, "grad_norm": 0.1103515625, "learning_rate": 0.00299805005775219, "loss": 1.2813, "step": 1178 }, { "epoch": 0.10338860198758887, "grad_norm": 0.2578125, "learning_rate": 0.00299802809209559, "loss": 1.2954, "step": 1179 }, { "epoch": 0.10347629376196342, "grad_norm": 0.1044921875, "learning_rate": 0.002998006003502308, "loss": 1.4056, "step": 1180 }, { "epoch": 0.10356398553633796, "grad_norm": 0.1572265625, "learning_rate": 0.0029979837919743586, "loss": 1.3866, "step": 1181 }, { "epoch": 0.10365167731071251, "grad_norm": 0.158203125, "learning_rate": 0.0029979614575137673, "loss": 1.3485, "step": 1182 }, { "epoch": 0.10373936908508706, "grad_norm": 0.373046875, "learning_rate": 0.00299793900012257, "loss": 1.3949, "step": 1183 }, { "epoch": 0.1038270608594616, "grad_norm": 0.37109375, "learning_rate": 0.002997916419802817, "loss": 1.391, "step": 1184 }, { "epoch": 0.10391475263383615, "grad_norm": 0.158203125, "learning_rate": 0.0029978937165565656, "loss": 1.3314, "step": 1185 }, { "epoch": 0.10400244440821069, "grad_norm": 0.10888671875, "learning_rate": 0.002997870890385886, "loss": 1.3823, "step": 1186 }, { "epoch": 0.10409013618258524, "grad_norm": 0.1181640625, "learning_rate": 0.002997847941292861, "loss": 1.391, "step": 1187 }, { "epoch": 0.10417782795695978, "grad_norm": 0.12353515625, "learning_rate": 0.0029978248692795837, "loss": 1.2936, "step": 1188 }, { "epoch": 0.10426551973133433, "grad_norm": 0.12060546875, "learning_rate": 0.0029978016743481576, "loss": 1.2837, "step": 1189 }, { "epoch": 0.10435321150570886, "grad_norm": 0.08837890625, "learning_rate": 0.0029977783565006983, "loss": 1.2655, "step": 1190 }, { "epoch": 0.10444090328008342, "grad_norm": 0.111328125, "learning_rate": 0.0029977549157393316, "loss": 1.2235, "step": 1191 }, { "epoch": 0.10452859505445797, "grad_norm": 0.09521484375, "learning_rate": 0.0029977313520661956, "loss": 1.3755, "step": 1192 }, { "epoch": 0.1046162868288325, "grad_norm": 0.08837890625, "learning_rate": 0.0029977076654834406, "loss": 1.3078, "step": 1193 }, { "epoch": 0.10470397860320706, "grad_norm": 0.07763671875, "learning_rate": 0.0029976838559932248, "loss": 1.3872, "step": 1194 }, { "epoch": 0.1047916703775816, "grad_norm": 0.07958984375, "learning_rate": 0.0029976599235977206, "loss": 1.3248, "step": 1195 }, { "epoch": 0.10487936215195615, "grad_norm": 0.08251953125, "learning_rate": 0.0029976358682991104, "loss": 1.2766, "step": 1196 }, { "epoch": 0.10496705392633068, "grad_norm": 0.09130859375, "learning_rate": 0.002997611690099587, "loss": 1.3499, "step": 1197 }, { "epoch": 0.10505474570070523, "grad_norm": 0.1005859375, "learning_rate": 0.0029975873890013575, "loss": 1.3031, "step": 1198 }, { "epoch": 0.10514243747507977, "grad_norm": 0.158203125, "learning_rate": 0.0029975629650066367, "loss": 1.3624, "step": 1199 }, { "epoch": 0.10523012924945432, "grad_norm": 0.14453125, "learning_rate": 0.0029975384181176513, "loss": 1.3101, "step": 1200 }, { "epoch": 0.10531782102382888, "grad_norm": 0.10888671875, "learning_rate": 0.0029975137483366416, "loss": 1.2886, "step": 1201 }, { "epoch": 0.10540551279820341, "grad_norm": 0.232421875, "learning_rate": 0.0029974889556658568, "loss": 1.2838, "step": 1202 }, { "epoch": 0.10549320457257796, "grad_norm": 0.12353515625, "learning_rate": 0.0029974640401075575, "loss": 1.3332, "step": 1203 }, { "epoch": 0.1055808963469525, "grad_norm": 0.1171875, "learning_rate": 0.002997439001664016, "loss": 1.3389, "step": 1204 }, { "epoch": 0.10566858812132705, "grad_norm": 0.1728515625, "learning_rate": 0.002997413840337516, "loss": 1.309, "step": 1205 }, { "epoch": 0.10575627989570159, "grad_norm": 0.12060546875, "learning_rate": 0.0029973885561303524, "loss": 1.2586, "step": 1206 }, { "epoch": 0.10584397167007614, "grad_norm": 0.10302734375, "learning_rate": 0.0029973631490448306, "loss": 1.3604, "step": 1207 }, { "epoch": 0.10593166344445068, "grad_norm": 0.1259765625, "learning_rate": 0.0029973376190832674, "loss": 1.3604, "step": 1208 }, { "epoch": 0.10601935521882523, "grad_norm": 0.08740234375, "learning_rate": 0.002997311966247992, "loss": 1.3153, "step": 1209 }, { "epoch": 0.10610704699319978, "grad_norm": 0.1279296875, "learning_rate": 0.0029972861905413436, "loss": 1.3669, "step": 1210 }, { "epoch": 0.10619473876757432, "grad_norm": 0.123046875, "learning_rate": 0.002997260291965672, "loss": 1.2343, "step": 1211 }, { "epoch": 0.10628243054194887, "grad_norm": 0.11279296875, "learning_rate": 0.0029972342705233404, "loss": 1.2207, "step": 1212 }, { "epoch": 0.10637012231632341, "grad_norm": 0.11181640625, "learning_rate": 0.0029972081262167197, "loss": 1.3529, "step": 1213 }, { "epoch": 0.10645781409069796, "grad_norm": 0.08203125, "learning_rate": 0.0029971818590481974, "loss": 1.3519, "step": 1214 }, { "epoch": 0.1065455058650725, "grad_norm": 0.142578125, "learning_rate": 0.0029971554690201665, "loss": 1.305, "step": 1215 }, { "epoch": 0.10663319763944705, "grad_norm": 0.146484375, "learning_rate": 0.0029971289561350344, "loss": 1.3518, "step": 1216 }, { "epoch": 0.10672088941382159, "grad_norm": 0.083984375, "learning_rate": 0.0029971023203952192, "loss": 1.3848, "step": 1217 }, { "epoch": 0.10680858118819614, "grad_norm": 0.138671875, "learning_rate": 0.00299707556180315, "loss": 1.3416, "step": 1218 }, { "epoch": 0.10689627296257069, "grad_norm": 0.1162109375, "learning_rate": 0.0029970486803612673, "loss": 1.3402, "step": 1219 }, { "epoch": 0.10698396473694523, "grad_norm": 0.07568359375, "learning_rate": 0.002997021676072022, "loss": 1.3151, "step": 1220 }, { "epoch": 0.10707165651131978, "grad_norm": 0.109375, "learning_rate": 0.002996994548937877, "loss": 1.3481, "step": 1221 }, { "epoch": 0.10715934828569432, "grad_norm": 0.1162109375, "learning_rate": 0.002996967298961307, "loss": 1.2862, "step": 1222 }, { "epoch": 0.10724704006006887, "grad_norm": 0.0830078125, "learning_rate": 0.0029969399261447955, "loss": 1.2572, "step": 1223 }, { "epoch": 0.1073347318344434, "grad_norm": 0.1005859375, "learning_rate": 0.002996912430490841, "loss": 1.284, "step": 1224 }, { "epoch": 0.10742242360881796, "grad_norm": 0.12451171875, "learning_rate": 0.002996884812001949, "loss": 1.3279, "step": 1225 }, { "epoch": 0.1075101153831925, "grad_norm": 0.203125, "learning_rate": 0.002996857070680639, "loss": 1.3086, "step": 1226 }, { "epoch": 0.10759780715756705, "grad_norm": 0.10400390625, "learning_rate": 0.002996829206529442, "loss": 1.3585, "step": 1227 }, { "epoch": 0.1076854989319416, "grad_norm": 0.16015625, "learning_rate": 0.0029968012195508978, "loss": 1.3435, "step": 1228 }, { "epoch": 0.10777319070631614, "grad_norm": 0.1884765625, "learning_rate": 0.0029967731097475586, "loss": 1.3828, "step": 1229 }, { "epoch": 0.10786088248069069, "grad_norm": 0.2314453125, "learning_rate": 0.002996744877121989, "loss": 1.3482, "step": 1230 }, { "epoch": 0.10794857425506522, "grad_norm": 0.11572265625, "learning_rate": 0.0029967165216767634, "loss": 1.3508, "step": 1231 }, { "epoch": 0.10803626602943978, "grad_norm": 0.2236328125, "learning_rate": 0.0029966880434144673, "loss": 1.2748, "step": 1232 }, { "epoch": 0.10812395780381431, "grad_norm": 0.158203125, "learning_rate": 0.0029966594423376973, "loss": 1.2977, "step": 1233 }, { "epoch": 0.10821164957818886, "grad_norm": 0.142578125, "learning_rate": 0.002996630718449064, "loss": 1.3324, "step": 1234 }, { "epoch": 0.1082993413525634, "grad_norm": 0.1728515625, "learning_rate": 0.0029966018717511845, "loss": 1.2831, "step": 1235 }, { "epoch": 0.10838703312693795, "grad_norm": 0.06884765625, "learning_rate": 0.002996572902246691, "loss": 1.2907, "step": 1236 }, { "epoch": 0.1084747249013125, "grad_norm": 0.09814453125, "learning_rate": 0.0029965438099382245, "loss": 1.3001, "step": 1237 }, { "epoch": 0.10856241667568704, "grad_norm": 0.0673828125, "learning_rate": 0.0029965145948284387, "loss": 1.3221, "step": 1238 }, { "epoch": 0.1086501084500616, "grad_norm": 0.11083984375, "learning_rate": 0.0029964852569199984, "loss": 1.2634, "step": 1239 }, { "epoch": 0.10873780022443613, "grad_norm": 0.072265625, "learning_rate": 0.002996455796215578, "loss": 1.294, "step": 1240 }, { "epoch": 0.10882549199881068, "grad_norm": 0.09765625, "learning_rate": 0.0029964262127178654, "loss": 1.3118, "step": 1241 }, { "epoch": 0.10891318377318522, "grad_norm": 0.10009765625, "learning_rate": 0.0029963965064295573, "loss": 1.27, "step": 1242 }, { "epoch": 0.10900087554755977, "grad_norm": 0.0830078125, "learning_rate": 0.002996366677353364, "loss": 1.3144, "step": 1243 }, { "epoch": 0.10908856732193431, "grad_norm": 0.0849609375, "learning_rate": 0.0029963367254920055, "loss": 1.339, "step": 1244 }, { "epoch": 0.10917625909630886, "grad_norm": 0.11376953125, "learning_rate": 0.002996306650848213, "loss": 1.3353, "step": 1245 }, { "epoch": 0.10926395087068341, "grad_norm": 0.1611328125, "learning_rate": 0.0029962764534247287, "loss": 1.2875, "step": 1246 }, { "epoch": 0.10935164264505795, "grad_norm": 0.076171875, "learning_rate": 0.0029962461332243085, "loss": 1.3335, "step": 1247 }, { "epoch": 0.1094393344194325, "grad_norm": 0.1513671875, "learning_rate": 0.0029962156902497154, "loss": 1.2532, "step": 1248 }, { "epoch": 0.10952702619380704, "grad_norm": 0.091796875, "learning_rate": 0.002996185124503727, "loss": 1.2625, "step": 1249 }, { "epoch": 0.10961471796818159, "grad_norm": 0.232421875, "learning_rate": 0.0029961544359891302, "loss": 1.3298, "step": 1250 }, { "epoch": 0.10970240974255613, "grad_norm": 0.0712890625, "learning_rate": 0.0029961236247087234, "loss": 1.2993, "step": 1251 }, { "epoch": 0.10979010151693068, "grad_norm": 0.193359375, "learning_rate": 0.0029960926906653185, "loss": 1.3254, "step": 1252 }, { "epoch": 0.10987779329130522, "grad_norm": 0.1162109375, "learning_rate": 0.0029960616338617335, "loss": 1.3134, "step": 1253 }, { "epoch": 0.10996548506567977, "grad_norm": 0.0927734375, "learning_rate": 0.0029960304543008034, "loss": 1.2123, "step": 1254 }, { "epoch": 0.11005317684005432, "grad_norm": 0.0654296875, "learning_rate": 0.00299599915198537, "loss": 1.3634, "step": 1255 }, { "epoch": 0.11014086861442886, "grad_norm": 0.09521484375, "learning_rate": 0.002995967726918289, "loss": 1.3638, "step": 1256 }, { "epoch": 0.11022856038880341, "grad_norm": 0.10595703125, "learning_rate": 0.002995936179102426, "loss": 1.2941, "step": 1257 }, { "epoch": 0.11031625216317795, "grad_norm": 0.1533203125, "learning_rate": 0.0029959045085406573, "loss": 1.3817, "step": 1258 }, { "epoch": 0.1104039439375525, "grad_norm": 0.130859375, "learning_rate": 0.002995872715235873, "loss": 1.241, "step": 1259 }, { "epoch": 0.11049163571192704, "grad_norm": 0.07080078125, "learning_rate": 0.0029958407991909704, "loss": 1.3193, "step": 1260 }, { "epoch": 0.11057932748630159, "grad_norm": 0.10400390625, "learning_rate": 0.0029958087604088617, "loss": 1.3237, "step": 1261 }, { "epoch": 0.11066701926067612, "grad_norm": 0.08349609375, "learning_rate": 0.002995776598892468, "loss": 1.303, "step": 1262 }, { "epoch": 0.11075471103505068, "grad_norm": 0.07958984375, "learning_rate": 0.0029957443146447224, "loss": 1.3359, "step": 1263 }, { "epoch": 0.11084240280942523, "grad_norm": 0.125, "learning_rate": 0.0029957119076685695, "loss": 1.292, "step": 1264 }, { "epoch": 0.11093009458379977, "grad_norm": 0.0693359375, "learning_rate": 0.002995679377966965, "loss": 1.3467, "step": 1265 }, { "epoch": 0.11101778635817432, "grad_norm": 0.1328125, "learning_rate": 0.0029956467255428743, "loss": 1.2914, "step": 1266 }, { "epoch": 0.11110547813254885, "grad_norm": 0.1416015625, "learning_rate": 0.0029956139503992765, "loss": 1.3085, "step": 1267 }, { "epoch": 0.1111931699069234, "grad_norm": 0.201171875, "learning_rate": 0.0029955810525391603, "loss": 1.2696, "step": 1268 }, { "epoch": 0.11128086168129794, "grad_norm": 0.1015625, "learning_rate": 0.002995548031965526, "loss": 1.3236, "step": 1269 }, { "epoch": 0.1113685534556725, "grad_norm": 0.2138671875, "learning_rate": 0.0029955148886813836, "loss": 1.3616, "step": 1270 }, { "epoch": 0.11145624523004703, "grad_norm": 0.10791015625, "learning_rate": 0.0029954816226897573, "loss": 1.3317, "step": 1271 }, { "epoch": 0.11154393700442158, "grad_norm": 0.1162109375, "learning_rate": 0.0029954482339936803, "loss": 1.3282, "step": 1272 }, { "epoch": 0.11163162877879614, "grad_norm": 0.0859375, "learning_rate": 0.002995414722596198, "loss": 1.3022, "step": 1273 }, { "epoch": 0.11171932055317067, "grad_norm": 0.08544921875, "learning_rate": 0.0029953810885003655, "loss": 1.3222, "step": 1274 }, { "epoch": 0.11180701232754522, "grad_norm": 0.07568359375, "learning_rate": 0.0029953473317092514, "loss": 1.3184, "step": 1275 }, { "epoch": 0.11189470410191976, "grad_norm": 0.10205078125, "learning_rate": 0.0029953134522259337, "loss": 1.2974, "step": 1276 }, { "epoch": 0.11198239587629431, "grad_norm": 0.1123046875, "learning_rate": 0.0029952794500535014, "loss": 1.3494, "step": 1277 }, { "epoch": 0.11207008765066885, "grad_norm": 0.107421875, "learning_rate": 0.0029952453251950563, "loss": 1.2715, "step": 1278 }, { "epoch": 0.1121577794250434, "grad_norm": 0.10302734375, "learning_rate": 0.0029952110776537105, "loss": 1.2665, "step": 1279 }, { "epoch": 0.11224547119941794, "grad_norm": 0.119140625, "learning_rate": 0.002995176707432587, "loss": 1.3398, "step": 1280 }, { "epoch": 0.11233316297379249, "grad_norm": 0.1103515625, "learning_rate": 0.0029951422145348206, "loss": 1.2775, "step": 1281 }, { "epoch": 0.11242085474816704, "grad_norm": 0.059814453125, "learning_rate": 0.0029951075989635566, "loss": 1.2788, "step": 1282 }, { "epoch": 0.11250854652254158, "grad_norm": 0.0908203125, "learning_rate": 0.0029950728607219513, "loss": 1.3765, "step": 1283 }, { "epoch": 0.11259623829691613, "grad_norm": 0.11962890625, "learning_rate": 0.0029950379998131744, "loss": 1.2966, "step": 1284 }, { "epoch": 0.11268393007129067, "grad_norm": 0.12158203125, "learning_rate": 0.0029950030162404035, "loss": 1.2517, "step": 1285 }, { "epoch": 0.11277162184566522, "grad_norm": 0.1455078125, "learning_rate": 0.0029949679100068297, "loss": 1.3251, "step": 1286 }, { "epoch": 0.11285931362003976, "grad_norm": 0.11865234375, "learning_rate": 0.002994932681115655, "loss": 1.3349, "step": 1287 }, { "epoch": 0.11294700539441431, "grad_norm": 0.1669921875, "learning_rate": 0.0029948973295700907, "loss": 1.3208, "step": 1288 }, { "epoch": 0.11303469716878885, "grad_norm": 0.126953125, "learning_rate": 0.002994861855373362, "loss": 1.4167, "step": 1289 }, { "epoch": 0.1131223889431634, "grad_norm": 0.09912109375, "learning_rate": 0.002994826258528705, "loss": 1.2385, "step": 1290 }, { "epoch": 0.11321008071753795, "grad_norm": 0.1103515625, "learning_rate": 0.0029947905390393637, "loss": 1.2785, "step": 1291 }, { "epoch": 0.11329777249191249, "grad_norm": 0.267578125, "learning_rate": 0.002994754696908597, "loss": 1.3461, "step": 1292 }, { "epoch": 0.11338546426628704, "grad_norm": 0.24609375, "learning_rate": 0.0029947187321396735, "loss": 1.3384, "step": 1293 }, { "epoch": 0.11347315604066158, "grad_norm": 0.0732421875, "learning_rate": 0.002994682644735873, "loss": 1.3497, "step": 1294 }, { "epoch": 0.11356084781503613, "grad_norm": 0.193359375, "learning_rate": 0.0029946464347004867, "loss": 1.3835, "step": 1295 }, { "epoch": 0.11364853958941067, "grad_norm": 0.09765625, "learning_rate": 0.002994610102036817, "loss": 1.3218, "step": 1296 }, { "epoch": 0.11373623136378522, "grad_norm": 0.12353515625, "learning_rate": 0.0029945736467481767, "loss": 1.2858, "step": 1297 }, { "epoch": 0.11382392313815975, "grad_norm": 0.0859375, "learning_rate": 0.002994537068837891, "loss": 1.2978, "step": 1298 }, { "epoch": 0.1139116149125343, "grad_norm": 0.09814453125, "learning_rate": 0.002994500368309295, "loss": 1.3046, "step": 1299 }, { "epoch": 0.11399930668690886, "grad_norm": 0.10400390625, "learning_rate": 0.0029944635451657365, "loss": 1.2858, "step": 1300 }, { "epoch": 0.1140869984612834, "grad_norm": 0.10498046875, "learning_rate": 0.002994426599410574, "loss": 1.3892, "step": 1301 }, { "epoch": 0.11417469023565795, "grad_norm": 0.1220703125, "learning_rate": 0.0029943895310471755, "loss": 1.3058, "step": 1302 }, { "epoch": 0.11426238201003248, "grad_norm": 0.115234375, "learning_rate": 0.0029943523400789225, "loss": 1.3764, "step": 1303 }, { "epoch": 0.11435007378440704, "grad_norm": 0.09326171875, "learning_rate": 0.0029943150265092067, "loss": 1.3253, "step": 1304 }, { "epoch": 0.11443776555878157, "grad_norm": 0.11181640625, "learning_rate": 0.0029942775903414307, "loss": 1.3296, "step": 1305 }, { "epoch": 0.11452545733315612, "grad_norm": 0.0888671875, "learning_rate": 0.0029942400315790085, "loss": 1.3896, "step": 1306 }, { "epoch": 0.11461314910753066, "grad_norm": 0.0654296875, "learning_rate": 0.0029942023502253657, "loss": 1.3069, "step": 1307 }, { "epoch": 0.11470084088190521, "grad_norm": 0.087890625, "learning_rate": 0.0029941645462839388, "loss": 1.2887, "step": 1308 }, { "epoch": 0.11478853265627977, "grad_norm": 0.0693359375, "learning_rate": 0.0029941266197581755, "loss": 1.3141, "step": 1309 }, { "epoch": 0.1148762244306543, "grad_norm": 0.0927734375, "learning_rate": 0.0029940885706515336, "loss": 1.3429, "step": 1310 }, { "epoch": 0.11496391620502885, "grad_norm": 0.0703125, "learning_rate": 0.002994050398967484, "loss": 1.3575, "step": 1311 }, { "epoch": 0.11505160797940339, "grad_norm": 0.11669921875, "learning_rate": 0.002994012104709508, "loss": 1.2967, "step": 1312 }, { "epoch": 0.11513929975377794, "grad_norm": 0.111328125, "learning_rate": 0.002993973687881097, "loss": 1.4125, "step": 1313 }, { "epoch": 0.11522699152815248, "grad_norm": 0.08056640625, "learning_rate": 0.0029939351484857555, "loss": 1.2555, "step": 1314 }, { "epoch": 0.11531468330252703, "grad_norm": 0.08447265625, "learning_rate": 0.0029938964865269977, "loss": 1.2359, "step": 1315 }, { "epoch": 0.11540237507690157, "grad_norm": 0.0810546875, "learning_rate": 0.0029938577020083503, "loss": 1.3448, "step": 1316 }, { "epoch": 0.11549006685127612, "grad_norm": 0.0703125, "learning_rate": 0.0029938187949333484, "loss": 1.2827, "step": 1317 }, { "epoch": 0.11557775862565067, "grad_norm": 0.07470703125, "learning_rate": 0.0029937797653055423, "loss": 1.3329, "step": 1318 }, { "epoch": 0.11566545040002521, "grad_norm": 0.083984375, "learning_rate": 0.00299374061312849, "loss": 1.2784, "step": 1319 }, { "epoch": 0.11575314217439976, "grad_norm": 0.12255859375, "learning_rate": 0.002993701338405763, "loss": 1.3145, "step": 1320 }, { "epoch": 0.1158408339487743, "grad_norm": 0.09765625, "learning_rate": 0.002993661941140943, "loss": 1.3254, "step": 1321 }, { "epoch": 0.11592852572314885, "grad_norm": 0.06298828125, "learning_rate": 0.002993622421337622, "loss": 1.3961, "step": 1322 }, { "epoch": 0.11601621749752339, "grad_norm": 0.08837890625, "learning_rate": 0.0029935827789994048, "loss": 1.3328, "step": 1323 }, { "epoch": 0.11610390927189794, "grad_norm": 0.1015625, "learning_rate": 0.002993543014129907, "loss": 1.2933, "step": 1324 }, { "epoch": 0.11619160104627248, "grad_norm": 0.2392578125, "learning_rate": 0.0029935031267327543, "loss": 1.3627, "step": 1325 }, { "epoch": 0.11627929282064703, "grad_norm": 0.205078125, "learning_rate": 0.0029934631168115847, "loss": 1.272, "step": 1326 }, { "epoch": 0.11636698459502158, "grad_norm": 0.09912109375, "learning_rate": 0.002993422984370047, "loss": 1.288, "step": 1327 }, { "epoch": 0.11645467636939612, "grad_norm": 0.15625, "learning_rate": 0.002993382729411801, "loss": 1.3332, "step": 1328 }, { "epoch": 0.11654236814377067, "grad_norm": 0.25, "learning_rate": 0.0029933423519405184, "loss": 1.3108, "step": 1329 }, { "epoch": 0.1166300599181452, "grad_norm": 0.1064453125, "learning_rate": 0.002993301851959881, "loss": 1.2691, "step": 1330 }, { "epoch": 0.11671775169251976, "grad_norm": 0.158203125, "learning_rate": 0.002993261229473582, "loss": 1.3041, "step": 1331 }, { "epoch": 0.1168054434668943, "grad_norm": 0.1279296875, "learning_rate": 0.0029932204844853273, "loss": 1.3623, "step": 1332 }, { "epoch": 0.11689313524126885, "grad_norm": 0.06640625, "learning_rate": 0.0029931796169988313, "loss": 1.29, "step": 1333 }, { "epoch": 0.11698082701564338, "grad_norm": 0.08837890625, "learning_rate": 0.002993138627017822, "loss": 1.3212, "step": 1334 }, { "epoch": 0.11706851879001794, "grad_norm": 0.0849609375, "learning_rate": 0.002993097514546037, "loss": 1.3031, "step": 1335 }, { "epoch": 0.11715621056439249, "grad_norm": 0.07666015625, "learning_rate": 0.0029930562795872258, "loss": 1.2398, "step": 1336 }, { "epoch": 0.11724390233876703, "grad_norm": 0.0703125, "learning_rate": 0.002993014922145149, "loss": 1.3357, "step": 1337 }, { "epoch": 0.11733159411314158, "grad_norm": 0.11181640625, "learning_rate": 0.002992973442223578, "loss": 1.3013, "step": 1338 }, { "epoch": 0.11741928588751611, "grad_norm": 0.095703125, "learning_rate": 0.0029929318398262965, "loss": 1.3366, "step": 1339 }, { "epoch": 0.11750697766189067, "grad_norm": 0.146484375, "learning_rate": 0.002992890114957098, "loss": 1.3721, "step": 1340 }, { "epoch": 0.1175946694362652, "grad_norm": 0.18359375, "learning_rate": 0.0029928482676197872, "loss": 1.2499, "step": 1341 }, { "epoch": 0.11768236121063975, "grad_norm": 0.1611328125, "learning_rate": 0.002992806297818181, "loss": 1.3645, "step": 1342 }, { "epoch": 0.11777005298501429, "grad_norm": 0.1142578125, "learning_rate": 0.002992764205556107, "loss": 1.2865, "step": 1343 }, { "epoch": 0.11785774475938884, "grad_norm": 0.1025390625, "learning_rate": 0.0029927219908374037, "loss": 1.3024, "step": 1344 }, { "epoch": 0.1179454365337634, "grad_norm": 0.1904296875, "learning_rate": 0.002992679653665921, "loss": 1.3191, "step": 1345 }, { "epoch": 0.11803312830813793, "grad_norm": 0.0693359375, "learning_rate": 0.0029926371940455204, "loss": 1.3488, "step": 1346 }, { "epoch": 0.11812082008251248, "grad_norm": 0.2890625, "learning_rate": 0.0029925946119800737, "loss": 1.3971, "step": 1347 }, { "epoch": 0.11820851185688702, "grad_norm": 0.251953125, "learning_rate": 0.0029925519074734635, "loss": 1.3229, "step": 1348 }, { "epoch": 0.11829620363126157, "grad_norm": 0.10693359375, "learning_rate": 0.002992509080529586, "loss": 1.3529, "step": 1349 }, { "epoch": 0.11838389540563611, "grad_norm": 0.265625, "learning_rate": 0.002992466131152345, "loss": 1.4095, "step": 1350 }, { "epoch": 0.11847158718001066, "grad_norm": 0.1904296875, "learning_rate": 0.0029924230593456596, "loss": 1.2716, "step": 1351 }, { "epoch": 0.1185592789543852, "grad_norm": 0.123046875, "learning_rate": 0.002992379865113456, "loss": 1.2908, "step": 1352 }, { "epoch": 0.11864697072875975, "grad_norm": 0.208984375, "learning_rate": 0.0029923365484596743, "loss": 1.2965, "step": 1353 }, { "epoch": 0.11873466250313429, "grad_norm": 0.11572265625, "learning_rate": 0.0029922931093882645, "loss": 1.3475, "step": 1354 }, { "epoch": 0.11882235427750884, "grad_norm": 0.29296875, "learning_rate": 0.002992249547903188, "loss": 1.2868, "step": 1355 }, { "epoch": 0.11891004605188339, "grad_norm": 0.1455078125, "learning_rate": 0.002992205864008418, "loss": 1.3291, "step": 1356 }, { "epoch": 0.11899773782625793, "grad_norm": 0.1552734375, "learning_rate": 0.002992162057707938, "loss": 1.3299, "step": 1357 }, { "epoch": 0.11908542960063248, "grad_norm": 0.146484375, "learning_rate": 0.0029921181290057425, "loss": 1.3613, "step": 1358 }, { "epoch": 0.11917312137500702, "grad_norm": 0.0908203125, "learning_rate": 0.0029920740779058393, "loss": 1.3172, "step": 1359 }, { "epoch": 0.11926081314938157, "grad_norm": 0.1064453125, "learning_rate": 0.0029920299044122445, "loss": 1.3173, "step": 1360 }, { "epoch": 0.11934850492375611, "grad_norm": 0.09228515625, "learning_rate": 0.0029919856085289863, "loss": 1.3222, "step": 1361 }, { "epoch": 0.11943619669813066, "grad_norm": 0.1689453125, "learning_rate": 0.0029919411902601055, "loss": 1.3778, "step": 1362 }, { "epoch": 0.1195238884725052, "grad_norm": 0.12158203125, "learning_rate": 0.002991896649609652, "loss": 1.3441, "step": 1363 }, { "epoch": 0.11961158024687975, "grad_norm": 0.1279296875, "learning_rate": 0.0029918519865816885, "loss": 1.3452, "step": 1364 }, { "epoch": 0.1196992720212543, "grad_norm": 0.1787109375, "learning_rate": 0.0029918072011802872, "loss": 1.2676, "step": 1365 }, { "epoch": 0.11978696379562884, "grad_norm": 0.103515625, "learning_rate": 0.002991762293409534, "loss": 1.2948, "step": 1366 }, { "epoch": 0.11987465557000339, "grad_norm": 0.150390625, "learning_rate": 0.002991717263273523, "loss": 1.2419, "step": 1367 }, { "epoch": 0.11996234734437793, "grad_norm": 0.10107421875, "learning_rate": 0.0029916721107763606, "loss": 1.2408, "step": 1368 }, { "epoch": 0.12005003911875248, "grad_norm": 0.08544921875, "learning_rate": 0.0029916268359221655, "loss": 1.3819, "step": 1369 }, { "epoch": 0.12013773089312701, "grad_norm": 0.11865234375, "learning_rate": 0.002991581438715066, "loss": 1.2392, "step": 1370 }, { "epoch": 0.12022542266750157, "grad_norm": 0.0927734375, "learning_rate": 0.0029915359191592036, "loss": 1.2534, "step": 1371 }, { "epoch": 0.1203131144418761, "grad_norm": 0.087890625, "learning_rate": 0.002991490277258728, "loss": 1.2898, "step": 1372 }, { "epoch": 0.12040080621625066, "grad_norm": 0.1279296875, "learning_rate": 0.0029914445130178016, "loss": 1.2815, "step": 1373 }, { "epoch": 0.1204884979906252, "grad_norm": 0.09716796875, "learning_rate": 0.002991398626440599, "loss": 1.3045, "step": 1374 }, { "epoch": 0.12057618976499974, "grad_norm": 0.1435546875, "learning_rate": 0.0029913526175313052, "loss": 1.2907, "step": 1375 }, { "epoch": 0.1206638815393743, "grad_norm": 0.09228515625, "learning_rate": 0.0029913064862941144, "loss": 1.3564, "step": 1376 }, { "epoch": 0.12075157331374883, "grad_norm": 0.076171875, "learning_rate": 0.002991260232733235, "loss": 1.2927, "step": 1377 }, { "epoch": 0.12083926508812338, "grad_norm": 0.1044921875, "learning_rate": 0.0029912138568528846, "loss": 1.3473, "step": 1378 }, { "epoch": 0.12092695686249792, "grad_norm": 0.07666015625, "learning_rate": 0.002991167358657293, "loss": 1.3737, "step": 1379 }, { "epoch": 0.12101464863687247, "grad_norm": 0.07958984375, "learning_rate": 0.0029911207381507, "loss": 1.2672, "step": 1380 }, { "epoch": 0.12110234041124701, "grad_norm": 0.08349609375, "learning_rate": 0.0029910739953373584, "loss": 1.267, "step": 1381 }, { "epoch": 0.12119003218562156, "grad_norm": 0.09228515625, "learning_rate": 0.0029910271302215304, "loss": 1.2971, "step": 1382 }, { "epoch": 0.12127772395999611, "grad_norm": 0.10009765625, "learning_rate": 0.0029909801428074896, "loss": 1.3148, "step": 1383 }, { "epoch": 0.12136541573437065, "grad_norm": 0.09033203125, "learning_rate": 0.0029909330330995213, "loss": 1.3299, "step": 1384 }, { "epoch": 0.1214531075087452, "grad_norm": 0.06591796875, "learning_rate": 0.0029908858011019226, "loss": 1.2774, "step": 1385 }, { "epoch": 0.12154079928311974, "grad_norm": 0.1474609375, "learning_rate": 0.002990838446819, "loss": 1.3522, "step": 1386 }, { "epoch": 0.12162849105749429, "grad_norm": 0.2333984375, "learning_rate": 0.002990790970255072, "loss": 1.2872, "step": 1387 }, { "epoch": 0.12171618283186883, "grad_norm": 0.09765625, "learning_rate": 0.0029907433714144696, "loss": 1.2608, "step": 1388 }, { "epoch": 0.12180387460624338, "grad_norm": 0.2373046875, "learning_rate": 0.0029906956503015325, "loss": 1.2954, "step": 1389 }, { "epoch": 0.12189156638061792, "grad_norm": 0.1181640625, "learning_rate": 0.002990647806920613, "loss": 1.3108, "step": 1390 }, { "epoch": 0.12197925815499247, "grad_norm": 0.228515625, "learning_rate": 0.0029905998412760744, "loss": 1.3155, "step": 1391 }, { "epoch": 0.12206694992936702, "grad_norm": 0.15625, "learning_rate": 0.002990551753372291, "loss": 1.274, "step": 1392 }, { "epoch": 0.12215464170374156, "grad_norm": 0.1611328125, "learning_rate": 0.0029905035432136484, "loss": 1.2705, "step": 1393 }, { "epoch": 0.12224233347811611, "grad_norm": 0.1728515625, "learning_rate": 0.0029904552108045426, "loss": 1.3025, "step": 1394 }, { "epoch": 0.12233002525249065, "grad_norm": 0.1669921875, "learning_rate": 0.0029904067561493824, "loss": 1.3554, "step": 1395 }, { "epoch": 0.1224177170268652, "grad_norm": 0.1650390625, "learning_rate": 0.0029903581792525866, "loss": 1.2629, "step": 1396 }, { "epoch": 0.12250540880123974, "grad_norm": 0.12890625, "learning_rate": 0.0029903094801185847, "loss": 1.3087, "step": 1397 }, { "epoch": 0.12259310057561429, "grad_norm": 0.162109375, "learning_rate": 0.002990260658751818, "loss": 1.2803, "step": 1398 }, { "epoch": 0.12268079234998883, "grad_norm": 0.1494140625, "learning_rate": 0.0029902117151567394, "loss": 1.3107, "step": 1399 }, { "epoch": 0.12276848412436338, "grad_norm": 0.162109375, "learning_rate": 0.002990162649337812, "loss": 1.3432, "step": 1400 }, { "epoch": 0.12285617589873793, "grad_norm": 0.09228515625, "learning_rate": 0.00299011346129951, "loss": 1.3475, "step": 1401 }, { "epoch": 0.12294386767311247, "grad_norm": 0.10107421875, "learning_rate": 0.00299006415104632, "loss": 1.3691, "step": 1402 }, { "epoch": 0.12303155944748702, "grad_norm": 0.08203125, "learning_rate": 0.00299001471858274, "loss": 1.3435, "step": 1403 }, { "epoch": 0.12311925122186156, "grad_norm": 0.126953125, "learning_rate": 0.002989965163913276, "loss": 1.2595, "step": 1404 }, { "epoch": 0.12320694299623611, "grad_norm": 0.07666015625, "learning_rate": 0.002989915487042448, "loss": 1.2682, "step": 1405 }, { "epoch": 0.12329463477061064, "grad_norm": 0.1298828125, "learning_rate": 0.002989865687974787, "loss": 1.3321, "step": 1406 }, { "epoch": 0.1233823265449852, "grad_norm": 0.07861328125, "learning_rate": 0.0029898157667148334, "loss": 1.315, "step": 1407 }, { "epoch": 0.12347001831935973, "grad_norm": 0.130859375, "learning_rate": 0.0029897657232671408, "loss": 1.3025, "step": 1408 }, { "epoch": 0.12355771009373429, "grad_norm": 0.12060546875, "learning_rate": 0.002989715557636273, "loss": 1.3442, "step": 1409 }, { "epoch": 0.12364540186810884, "grad_norm": 0.10498046875, "learning_rate": 0.0029896652698268057, "loss": 1.3382, "step": 1410 }, { "epoch": 0.12373309364248337, "grad_norm": 0.13671875, "learning_rate": 0.0029896148598433227, "loss": 1.2784, "step": 1411 }, { "epoch": 0.12382078541685793, "grad_norm": 0.1875, "learning_rate": 0.0029895643276904235, "loss": 1.2851, "step": 1412 }, { "epoch": 0.12390847719123246, "grad_norm": 0.09814453125, "learning_rate": 0.0029895136733727157, "loss": 1.2747, "step": 1413 }, { "epoch": 0.12399616896560701, "grad_norm": 0.173828125, "learning_rate": 0.0029894628968948184, "loss": 1.3125, "step": 1414 }, { "epoch": 0.12408386073998155, "grad_norm": 0.07080078125, "learning_rate": 0.002989411998261363, "loss": 1.3735, "step": 1415 }, { "epoch": 0.1241715525143561, "grad_norm": 0.095703125, "learning_rate": 0.0029893609774769908, "loss": 1.3115, "step": 1416 }, { "epoch": 0.12425924428873064, "grad_norm": 0.1025390625, "learning_rate": 0.0029893098345463547, "loss": 1.3742, "step": 1417 }, { "epoch": 0.12434693606310519, "grad_norm": 0.1142578125, "learning_rate": 0.0029892585694741196, "loss": 1.3222, "step": 1418 }, { "epoch": 0.12443462783747974, "grad_norm": 0.09619140625, "learning_rate": 0.00298920718226496, "loss": 1.3342, "step": 1419 }, { "epoch": 0.12452231961185428, "grad_norm": 0.09326171875, "learning_rate": 0.0029891556729235626, "loss": 1.3557, "step": 1420 }, { "epoch": 0.12461001138622883, "grad_norm": 0.0791015625, "learning_rate": 0.002989104041454625, "loss": 1.3099, "step": 1421 }, { "epoch": 0.12469770316060337, "grad_norm": 0.15625, "learning_rate": 0.002989052287862856, "loss": 1.3465, "step": 1422 }, { "epoch": 0.12478539493497792, "grad_norm": 0.1796875, "learning_rate": 0.002989000412152975, "loss": 1.3353, "step": 1423 }, { "epoch": 0.12487308670935246, "grad_norm": 0.0908203125, "learning_rate": 0.0029889484143297124, "loss": 1.318, "step": 1424 }, { "epoch": 0.12496077848372701, "grad_norm": 0.169921875, "learning_rate": 0.0029888962943978113, "loss": 1.2814, "step": 1425 }, { "epoch": 0.12504847025810156, "grad_norm": 0.09130859375, "learning_rate": 0.002988844052362024, "loss": 1.3305, "step": 1426 }, { "epoch": 0.1251361620324761, "grad_norm": 0.2099609375, "learning_rate": 0.002988791688227116, "loss": 1.3228, "step": 1427 }, { "epoch": 0.12522385380685064, "grad_norm": 0.130859375, "learning_rate": 0.002988739201997862, "loss": 1.3273, "step": 1428 }, { "epoch": 0.1253115455812252, "grad_norm": 0.1318359375, "learning_rate": 0.002988686593679048, "loss": 1.2191, "step": 1429 }, { "epoch": 0.12539923735559974, "grad_norm": 0.0986328125, "learning_rate": 0.0029886338632754733, "loss": 1.2992, "step": 1430 }, { "epoch": 0.12548692912997428, "grad_norm": 0.08642578125, "learning_rate": 0.0029885810107919456, "loss": 1.3225, "step": 1431 }, { "epoch": 0.12557462090434882, "grad_norm": 0.0830078125, "learning_rate": 0.0029885280362332853, "loss": 1.3324, "step": 1432 }, { "epoch": 0.12566231267872338, "grad_norm": 0.09423828125, "learning_rate": 0.0029884749396043237, "loss": 1.3346, "step": 1433 }, { "epoch": 0.12575000445309792, "grad_norm": 0.080078125, "learning_rate": 0.0029884217209099023, "loss": 1.2389, "step": 1434 }, { "epoch": 0.12583769622747246, "grad_norm": 0.1279296875, "learning_rate": 0.0029883683801548754, "loss": 1.3179, "step": 1435 }, { "epoch": 0.12592538800184702, "grad_norm": 0.1640625, "learning_rate": 0.0029883149173441075, "loss": 1.2994, "step": 1436 }, { "epoch": 0.12601307977622156, "grad_norm": 0.08349609375, "learning_rate": 0.0029882613324824737, "loss": 1.2993, "step": 1437 }, { "epoch": 0.1261007715505961, "grad_norm": 0.2734375, "learning_rate": 0.002988207625574861, "loss": 1.2902, "step": 1438 }, { "epoch": 0.12618846332497063, "grad_norm": 0.302734375, "learning_rate": 0.0029881537966261673, "loss": 1.295, "step": 1439 }, { "epoch": 0.1262761550993452, "grad_norm": 0.0703125, "learning_rate": 0.0029880998456413023, "loss": 1.3192, "step": 1440 }, { "epoch": 0.12636384687371974, "grad_norm": 0.220703125, "learning_rate": 0.002988045772625185, "loss": 1.2559, "step": 1441 }, { "epoch": 0.12645153864809427, "grad_norm": 0.09814453125, "learning_rate": 0.0029879915775827483, "loss": 1.2742, "step": 1442 }, { "epoch": 0.1265392304224688, "grad_norm": 0.1591796875, "learning_rate": 0.002987937260518933, "loss": 1.3096, "step": 1443 }, { "epoch": 0.12662692219684338, "grad_norm": 0.11181640625, "learning_rate": 0.0029878828214386934, "loss": 1.3017, "step": 1444 }, { "epoch": 0.12671461397121792, "grad_norm": 0.0908203125, "learning_rate": 0.0029878282603469945, "loss": 1.3181, "step": 1445 }, { "epoch": 0.12680230574559245, "grad_norm": 0.08251953125, "learning_rate": 0.0029877735772488117, "loss": 1.3489, "step": 1446 }, { "epoch": 0.12688999751996702, "grad_norm": 0.11865234375, "learning_rate": 0.0029877187721491323, "loss": 1.2814, "step": 1447 }, { "epoch": 0.12697768929434156, "grad_norm": 0.0859375, "learning_rate": 0.002987663845052954, "loss": 1.2852, "step": 1448 }, { "epoch": 0.1270653810687161, "grad_norm": 0.14453125, "learning_rate": 0.002987608795965286, "loss": 1.3262, "step": 1449 }, { "epoch": 0.12715307284309063, "grad_norm": 0.1337890625, "learning_rate": 0.002987553624891149, "loss": 1.312, "step": 1450 }, { "epoch": 0.1272407646174652, "grad_norm": 0.10302734375, "learning_rate": 0.0029874983318355745, "loss": 1.334, "step": 1451 }, { "epoch": 0.12732845639183973, "grad_norm": 0.19921875, "learning_rate": 0.0029874429168036047, "loss": 1.3343, "step": 1452 }, { "epoch": 0.12741614816621427, "grad_norm": 1.8125, "learning_rate": 0.0029873873798002934, "loss": 1.3057, "step": 1453 }, { "epoch": 0.1275038399405888, "grad_norm": 0.11279296875, "learning_rate": 0.0029873317208307056, "loss": 1.35, "step": 1454 }, { "epoch": 0.12759153171496337, "grad_norm": 0.2578125, "learning_rate": 0.002987275939899917, "loss": 1.3545, "step": 1455 }, { "epoch": 0.1276792234893379, "grad_norm": 0.357421875, "learning_rate": 0.002987220037013015, "loss": 1.2747, "step": 1456 }, { "epoch": 0.12776691526371245, "grad_norm": 0.154296875, "learning_rate": 0.002987164012175098, "loss": 1.3802, "step": 1457 }, { "epoch": 0.12785460703808701, "grad_norm": 0.12255859375, "learning_rate": 0.0029871078653912744, "loss": 1.3492, "step": 1458 }, { "epoch": 0.12794229881246155, "grad_norm": 0.1982421875, "learning_rate": 0.0029870515966666654, "loss": 1.3027, "step": 1459 }, { "epoch": 0.1280299905868361, "grad_norm": 0.220703125, "learning_rate": 0.002986995206006402, "loss": 1.3125, "step": 1460 }, { "epoch": 0.12811768236121063, "grad_norm": 0.203125, "learning_rate": 0.0029869386934156276, "loss": 1.2989, "step": 1461 }, { "epoch": 0.1282053741355852, "grad_norm": 0.12353515625, "learning_rate": 0.0029868820588994957, "loss": 1.2363, "step": 1462 }, { "epoch": 0.12829306590995973, "grad_norm": 0.1201171875, "learning_rate": 0.0029868253024631715, "loss": 1.3255, "step": 1463 }, { "epoch": 0.12838075768433427, "grad_norm": 0.28125, "learning_rate": 0.00298676842411183, "loss": 1.317, "step": 1464 }, { "epoch": 0.12846844945870883, "grad_norm": 0.11474609375, "learning_rate": 0.00298671142385066, "loss": 1.2981, "step": 1465 }, { "epoch": 0.12855614123308337, "grad_norm": 0.267578125, "learning_rate": 0.0029866543016848577, "loss": 1.3597, "step": 1466 }, { "epoch": 0.1286438330074579, "grad_norm": 0.1376953125, "learning_rate": 0.002986597057619634, "loss": 1.2674, "step": 1467 }, { "epoch": 0.12873152478183245, "grad_norm": 0.1806640625, "learning_rate": 0.0029865396916602094, "loss": 1.3773, "step": 1468 }, { "epoch": 0.128819216556207, "grad_norm": 0.201171875, "learning_rate": 0.002986482203811815, "loss": 1.331, "step": 1469 }, { "epoch": 0.12890690833058155, "grad_norm": 0.1240234375, "learning_rate": 0.002986424594079694, "loss": 1.3114, "step": 1470 }, { "epoch": 0.1289946001049561, "grad_norm": 0.2060546875, "learning_rate": 0.0029863668624690995, "loss": 1.3031, "step": 1471 }, { "epoch": 0.12908229187933062, "grad_norm": 0.365234375, "learning_rate": 0.0029863090089852972, "loss": 1.2871, "step": 1472 }, { "epoch": 0.1291699836537052, "grad_norm": 0.10888671875, "learning_rate": 0.0029862510336335635, "loss": 1.3051, "step": 1473 }, { "epoch": 0.12925767542807973, "grad_norm": 0.185546875, "learning_rate": 0.002986192936419184, "loss": 1.3369, "step": 1474 }, { "epoch": 0.12934536720245426, "grad_norm": 0.0810546875, "learning_rate": 0.0029861347173474584, "loss": 1.3769, "step": 1475 }, { "epoch": 0.12943305897682883, "grad_norm": 0.1572265625, "learning_rate": 0.0029860763764236963, "loss": 1.3722, "step": 1476 }, { "epoch": 0.12952075075120337, "grad_norm": 0.1611328125, "learning_rate": 0.0029860179136532174, "loss": 1.3819, "step": 1477 }, { "epoch": 0.1296084425255779, "grad_norm": 0.08154296875, "learning_rate": 0.0029859593290413535, "loss": 1.3006, "step": 1478 }, { "epoch": 0.12969613429995244, "grad_norm": 0.1298828125, "learning_rate": 0.002985900622593448, "loss": 1.2738, "step": 1479 }, { "epoch": 0.129783826074327, "grad_norm": 0.07373046875, "learning_rate": 0.0029858417943148534, "loss": 1.3179, "step": 1480 }, { "epoch": 0.12987151784870155, "grad_norm": 0.072265625, "learning_rate": 0.0029857828442109366, "loss": 1.2314, "step": 1481 }, { "epoch": 0.12995920962307608, "grad_norm": 0.10791015625, "learning_rate": 0.0029857237722870724, "loss": 1.3601, "step": 1482 }, { "epoch": 0.13004690139745065, "grad_norm": 0.0654296875, "learning_rate": 0.002985664578548648, "loss": 1.2792, "step": 1483 }, { "epoch": 0.13013459317182519, "grad_norm": 0.09375, "learning_rate": 0.0029856052630010625, "loss": 1.2767, "step": 1484 }, { "epoch": 0.13022228494619972, "grad_norm": 0.07861328125, "learning_rate": 0.002985545825649725, "loss": 1.336, "step": 1485 }, { "epoch": 0.13030997672057426, "grad_norm": 0.08544921875, "learning_rate": 0.0029854862665000554, "loss": 1.3138, "step": 1486 }, { "epoch": 0.13039766849494883, "grad_norm": 0.12255859375, "learning_rate": 0.002985426585557486, "loss": 1.252, "step": 1487 }, { "epoch": 0.13048536026932336, "grad_norm": 0.07177734375, "learning_rate": 0.0029853667828274593, "loss": 1.2989, "step": 1488 }, { "epoch": 0.1305730520436979, "grad_norm": 0.546875, "learning_rate": 0.0029853068583154292, "loss": 1.3, "step": 1489 }, { "epoch": 0.13066074381807244, "grad_norm": 0.08349609375, "learning_rate": 0.0029852468120268607, "loss": 1.3237, "step": 1490 }, { "epoch": 0.130748435592447, "grad_norm": 0.1494140625, "learning_rate": 0.00298518664396723, "loss": 1.3524, "step": 1491 }, { "epoch": 0.13083612736682154, "grad_norm": 0.1220703125, "learning_rate": 0.0029851263541420246, "loss": 1.3396, "step": 1492 }, { "epoch": 0.13092381914119608, "grad_norm": 0.08349609375, "learning_rate": 0.0029850659425567413, "loss": 1.3305, "step": 1493 }, { "epoch": 0.13101151091557064, "grad_norm": 0.181640625, "learning_rate": 0.0029850054092168915, "loss": 1.3134, "step": 1494 }, { "epoch": 0.13109920268994518, "grad_norm": 0.0966796875, "learning_rate": 0.002984944754127994, "loss": 1.3054, "step": 1495 }, { "epoch": 0.13118689446431972, "grad_norm": 0.1484375, "learning_rate": 0.0029848839772955815, "loss": 1.2187, "step": 1496 }, { "epoch": 0.13127458623869426, "grad_norm": 0.158203125, "learning_rate": 0.002984823078725196, "loss": 1.3308, "step": 1497 }, { "epoch": 0.13136227801306882, "grad_norm": 0.08349609375, "learning_rate": 0.0029847620584223917, "loss": 1.2468, "step": 1498 }, { "epoch": 0.13144996978744336, "grad_norm": 0.09521484375, "learning_rate": 0.0029847009163927333, "loss": 1.2942, "step": 1499 }, { "epoch": 0.1315376615618179, "grad_norm": 0.083984375, "learning_rate": 0.0029846396526417964, "loss": 1.2777, "step": 1500 }, { "epoch": 0.1315376615618179, "eval_loss": 1.3182919025421143, "eval_runtime": 429.272, "eval_samples_per_second": 33.655, "eval_steps_per_second": 8.414, "step": 1500 }, { "epoch": 0.13162535333619246, "grad_norm": 0.076171875, "learning_rate": 0.002984578267175169, "loss": 1.284, "step": 1501 }, { "epoch": 0.131713045110567, "grad_norm": 0.08447265625, "learning_rate": 0.0029845167599984487, "loss": 1.3167, "step": 1502 }, { "epoch": 0.13180073688494154, "grad_norm": 0.0791015625, "learning_rate": 0.002984455131117245, "loss": 1.2918, "step": 1503 }, { "epoch": 0.13188842865931608, "grad_norm": 0.091796875, "learning_rate": 0.0029843933805371783, "loss": 1.2734, "step": 1504 }, { "epoch": 0.13197612043369064, "grad_norm": 0.0712890625, "learning_rate": 0.00298433150826388, "loss": 1.3092, "step": 1505 }, { "epoch": 0.13206381220806518, "grad_norm": 0.1318359375, "learning_rate": 0.0029842695143029925, "loss": 1.2861, "step": 1506 }, { "epoch": 0.13215150398243972, "grad_norm": 0.2353515625, "learning_rate": 0.0029842073986601696, "loss": 1.2986, "step": 1507 }, { "epoch": 0.13223919575681425, "grad_norm": 0.1552734375, "learning_rate": 0.0029841451613410765, "loss": 1.3754, "step": 1508 }, { "epoch": 0.13232688753118882, "grad_norm": 0.11962890625, "learning_rate": 0.0029840828023513888, "loss": 1.2943, "step": 1509 }, { "epoch": 0.13241457930556336, "grad_norm": 0.1328125, "learning_rate": 0.0029840203216967933, "loss": 1.256, "step": 1510 }, { "epoch": 0.1325022710799379, "grad_norm": 0.1259765625, "learning_rate": 0.002983957719382988, "loss": 1.3142, "step": 1511 }, { "epoch": 0.13258996285431246, "grad_norm": 0.10498046875, "learning_rate": 0.0029838949954156826, "loss": 1.3331, "step": 1512 }, { "epoch": 0.132677654628687, "grad_norm": 0.166015625, "learning_rate": 0.002983832149800597, "loss": 1.3206, "step": 1513 }, { "epoch": 0.13276534640306153, "grad_norm": 0.1533203125, "learning_rate": 0.0029837691825434624, "loss": 1.4007, "step": 1514 }, { "epoch": 0.13285303817743607, "grad_norm": 0.0830078125, "learning_rate": 0.0029837060936500214, "loss": 1.3109, "step": 1515 }, { "epoch": 0.13294072995181064, "grad_norm": 0.1357421875, "learning_rate": 0.002983642883126028, "loss": 1.3233, "step": 1516 }, { "epoch": 0.13302842172618518, "grad_norm": 0.07373046875, "learning_rate": 0.002983579550977246, "loss": 1.3065, "step": 1517 }, { "epoch": 0.1331161135005597, "grad_norm": 0.08154296875, "learning_rate": 0.002983516097209452, "loss": 1.3287, "step": 1518 }, { "epoch": 0.13320380527493428, "grad_norm": 0.076171875, "learning_rate": 0.002983452521828432, "loss": 1.3336, "step": 1519 }, { "epoch": 0.13329149704930882, "grad_norm": 0.06298828125, "learning_rate": 0.0029833888248399845, "loss": 1.3169, "step": 1520 }, { "epoch": 0.13337918882368335, "grad_norm": 0.08349609375, "learning_rate": 0.0029833250062499188, "loss": 1.3375, "step": 1521 }, { "epoch": 0.1334668805980579, "grad_norm": 0.06591796875, "learning_rate": 0.0029832610660640536, "loss": 1.3951, "step": 1522 }, { "epoch": 0.13355457237243246, "grad_norm": 0.107421875, "learning_rate": 0.0029831970042882216, "loss": 1.2699, "step": 1523 }, { "epoch": 0.133642264146807, "grad_norm": 0.130859375, "learning_rate": 0.0029831328209282645, "loss": 1.2956, "step": 1524 }, { "epoch": 0.13372995592118153, "grad_norm": 0.08154296875, "learning_rate": 0.0029830685159900347, "loss": 1.3326, "step": 1525 }, { "epoch": 0.13381764769555607, "grad_norm": 0.1787109375, "learning_rate": 0.0029830040894793983, "loss": 1.3339, "step": 1526 }, { "epoch": 0.13390533946993063, "grad_norm": 0.3203125, "learning_rate": 0.0029829395414022303, "loss": 1.3356, "step": 1527 }, { "epoch": 0.13399303124430517, "grad_norm": 0.1953125, "learning_rate": 0.0029828748717644167, "loss": 1.3342, "step": 1528 }, { "epoch": 0.1340807230186797, "grad_norm": 0.0869140625, "learning_rate": 0.0029828100805718554, "loss": 1.2823, "step": 1529 }, { "epoch": 0.13416841479305427, "grad_norm": 0.1572265625, "learning_rate": 0.0029827451678304555, "loss": 1.2867, "step": 1530 }, { "epoch": 0.1342561065674288, "grad_norm": 0.10302734375, "learning_rate": 0.002982680133546137, "loss": 1.3465, "step": 1531 }, { "epoch": 0.13434379834180335, "grad_norm": 0.1484375, "learning_rate": 0.0029826149777248305, "loss": 1.2621, "step": 1532 }, { "epoch": 0.1344314901161779, "grad_norm": 0.0908203125, "learning_rate": 0.002982549700372478, "loss": 1.3645, "step": 1533 }, { "epoch": 0.13451918189055245, "grad_norm": 0.16015625, "learning_rate": 0.0029824843014950326, "loss": 1.2433, "step": 1534 }, { "epoch": 0.134606873664927, "grad_norm": 0.07470703125, "learning_rate": 0.002982418781098459, "loss": 1.3254, "step": 1535 }, { "epoch": 0.13469456543930153, "grad_norm": 0.1513671875, "learning_rate": 0.0029823531391887322, "loss": 1.3154, "step": 1536 }, { "epoch": 0.1347822572136761, "grad_norm": 0.07666015625, "learning_rate": 0.0029822873757718387, "loss": 1.3446, "step": 1537 }, { "epoch": 0.13486994898805063, "grad_norm": 0.138671875, "learning_rate": 0.0029822214908537758, "loss": 1.2785, "step": 1538 }, { "epoch": 0.13495764076242517, "grad_norm": 0.07958984375, "learning_rate": 0.0029821554844405517, "loss": 1.3284, "step": 1539 }, { "epoch": 0.1350453325367997, "grad_norm": 0.1728515625, "learning_rate": 0.002982089356538187, "loss": 1.3399, "step": 1540 }, { "epoch": 0.13513302431117427, "grad_norm": 0.11279296875, "learning_rate": 0.002982023107152711, "loss": 1.3512, "step": 1541 }, { "epoch": 0.1352207160855488, "grad_norm": 0.11767578125, "learning_rate": 0.0029819567362901664, "loss": 1.3023, "step": 1542 }, { "epoch": 0.13530840785992335, "grad_norm": 0.0888671875, "learning_rate": 0.002981890243956606, "loss": 1.3128, "step": 1543 }, { "epoch": 0.13539609963429788, "grad_norm": 0.107421875, "learning_rate": 0.002981823630158094, "loss": 1.3158, "step": 1544 }, { "epoch": 0.13548379140867245, "grad_norm": 0.0966796875, "learning_rate": 0.0029817568949007047, "loss": 1.3173, "step": 1545 }, { "epoch": 0.135571483183047, "grad_norm": 0.08935546875, "learning_rate": 0.002981690038190524, "loss": 1.302, "step": 1546 }, { "epoch": 0.13565917495742152, "grad_norm": 0.0830078125, "learning_rate": 0.0029816230600336504, "loss": 1.2854, "step": 1547 }, { "epoch": 0.1357468667317961, "grad_norm": 0.08447265625, "learning_rate": 0.0029815559604361905, "loss": 1.2837, "step": 1548 }, { "epoch": 0.13583455850617063, "grad_norm": 0.1435546875, "learning_rate": 0.002981488739404265, "loss": 1.2879, "step": 1549 }, { "epoch": 0.13592225028054516, "grad_norm": 0.09033203125, "learning_rate": 0.0029814213969440034, "loss": 1.3034, "step": 1550 }, { "epoch": 0.1360099420549197, "grad_norm": 0.1357421875, "learning_rate": 0.0029813539330615477, "loss": 1.3412, "step": 1551 }, { "epoch": 0.13609763382929427, "grad_norm": 0.08203125, "learning_rate": 0.0029812863477630502, "loss": 1.2951, "step": 1552 }, { "epoch": 0.1361853256036688, "grad_norm": 0.126953125, "learning_rate": 0.002981218641054674, "loss": 1.2204, "step": 1553 }, { "epoch": 0.13627301737804334, "grad_norm": 0.10107421875, "learning_rate": 0.002981150812942595, "loss": 1.2347, "step": 1554 }, { "epoch": 0.1363607091524179, "grad_norm": 0.08935546875, "learning_rate": 0.0029810828634329973, "loss": 1.2983, "step": 1555 }, { "epoch": 0.13644840092679245, "grad_norm": 0.06689453125, "learning_rate": 0.002981014792532079, "loss": 1.3449, "step": 1556 }, { "epoch": 0.13653609270116698, "grad_norm": 0.10302734375, "learning_rate": 0.0029809466002460477, "loss": 1.3148, "step": 1557 }, { "epoch": 0.13662378447554152, "grad_norm": 0.0654296875, "learning_rate": 0.0029808782865811223, "loss": 1.3352, "step": 1558 }, { "epoch": 0.1367114762499161, "grad_norm": 0.2041015625, "learning_rate": 0.002980809851543533, "loss": 1.4218, "step": 1559 }, { "epoch": 0.13679916802429062, "grad_norm": 0.134765625, "learning_rate": 0.0029807412951395203, "loss": 1.3049, "step": 1560 }, { "epoch": 0.13688685979866516, "grad_norm": 0.11767578125, "learning_rate": 0.0029806726173753372, "loss": 1.3568, "step": 1561 }, { "epoch": 0.1369745515730397, "grad_norm": 0.0810546875, "learning_rate": 0.0029806038182572463, "loss": 1.3048, "step": 1562 }, { "epoch": 0.13706224334741426, "grad_norm": 0.09716796875, "learning_rate": 0.0029805348977915216, "loss": 1.3356, "step": 1563 }, { "epoch": 0.1371499351217888, "grad_norm": 0.06396484375, "learning_rate": 0.00298046585598445, "loss": 1.296, "step": 1564 }, { "epoch": 0.13723762689616334, "grad_norm": 0.1240234375, "learning_rate": 0.0029803966928423262, "loss": 1.2985, "step": 1565 }, { "epoch": 0.1373253186705379, "grad_norm": 0.08837890625, "learning_rate": 0.0029803274083714582, "loss": 1.2839, "step": 1566 }, { "epoch": 0.13741301044491244, "grad_norm": 0.095703125, "learning_rate": 0.0029802580025781655, "loss": 1.3112, "step": 1567 }, { "epoch": 0.13750070221928698, "grad_norm": 0.07275390625, "learning_rate": 0.0029801884754687767, "loss": 1.3728, "step": 1568 }, { "epoch": 0.13758839399366152, "grad_norm": 0.07177734375, "learning_rate": 0.0029801188270496327, "loss": 1.3091, "step": 1569 }, { "epoch": 0.13767608576803608, "grad_norm": 0.07666015625, "learning_rate": 0.002980049057327086, "loss": 1.2518, "step": 1570 }, { "epoch": 0.13776377754241062, "grad_norm": 0.10888671875, "learning_rate": 0.002979979166307498, "loss": 1.3197, "step": 1571 }, { "epoch": 0.13785146931678516, "grad_norm": 0.0849609375, "learning_rate": 0.002979909153997243, "loss": 1.3024, "step": 1572 }, { "epoch": 0.13793916109115972, "grad_norm": 0.0859375, "learning_rate": 0.002979839020402707, "loss": 1.2366, "step": 1573 }, { "epoch": 0.13802685286553426, "grad_norm": 0.08154296875, "learning_rate": 0.0029797687655302853, "loss": 1.2927, "step": 1574 }, { "epoch": 0.1381145446399088, "grad_norm": 0.0791015625, "learning_rate": 0.002979698389386385, "loss": 1.3856, "step": 1575 }, { "epoch": 0.13820223641428334, "grad_norm": 0.08251953125, "learning_rate": 0.002979627891977424, "loss": 1.3791, "step": 1576 }, { "epoch": 0.1382899281886579, "grad_norm": 0.11474609375, "learning_rate": 0.0029795572733098317, "loss": 1.2658, "step": 1577 }, { "epoch": 0.13837761996303244, "grad_norm": 0.062255859375, "learning_rate": 0.002979486533390048, "loss": 1.3256, "step": 1578 }, { "epoch": 0.13846531173740698, "grad_norm": 0.1689453125, "learning_rate": 0.002979415672224525, "loss": 1.2792, "step": 1579 }, { "epoch": 0.13855300351178151, "grad_norm": 0.0751953125, "learning_rate": 0.0029793446898197244, "loss": 1.2801, "step": 1580 }, { "epoch": 0.13864069528615608, "grad_norm": 0.1875, "learning_rate": 0.00297927358618212, "loss": 1.3045, "step": 1581 }, { "epoch": 0.13872838706053062, "grad_norm": 0.1728515625, "learning_rate": 0.0029792023613181957, "loss": 1.2642, "step": 1582 }, { "epoch": 0.13881607883490515, "grad_norm": 0.10302734375, "learning_rate": 0.0029791310152344473, "loss": 1.2261, "step": 1583 }, { "epoch": 0.13890377060927972, "grad_norm": 0.142578125, "learning_rate": 0.0029790595479373813, "loss": 1.3044, "step": 1584 }, { "epoch": 0.13899146238365426, "grad_norm": 0.16015625, "learning_rate": 0.0029789879594335164, "loss": 1.3165, "step": 1585 }, { "epoch": 0.1390791541580288, "grad_norm": 0.0859375, "learning_rate": 0.0029789162497293794, "loss": 1.2949, "step": 1586 }, { "epoch": 0.13916684593240333, "grad_norm": 0.10205078125, "learning_rate": 0.002978844418831511, "loss": 1.3131, "step": 1587 }, { "epoch": 0.1392545377067779, "grad_norm": 0.23046875, "learning_rate": 0.0029787724667464624, "loss": 1.3262, "step": 1588 }, { "epoch": 0.13934222948115244, "grad_norm": 0.1728515625, "learning_rate": 0.002978700393480795, "loss": 1.217, "step": 1589 }, { "epoch": 0.13942992125552697, "grad_norm": 0.1904296875, "learning_rate": 0.0029786281990410823, "loss": 1.3964, "step": 1590 }, { "epoch": 0.13951761302990154, "grad_norm": 0.30078125, "learning_rate": 0.0029785558834339067, "loss": 1.3288, "step": 1591 }, { "epoch": 0.13960530480427608, "grad_norm": 0.0791015625, "learning_rate": 0.002978483446665865, "loss": 1.2713, "step": 1592 }, { "epoch": 0.1396929965786506, "grad_norm": 0.296875, "learning_rate": 0.0029784108887435617, "loss": 1.3323, "step": 1593 }, { "epoch": 0.13978068835302515, "grad_norm": 0.08154296875, "learning_rate": 0.0029783382096736145, "loss": 1.3132, "step": 1594 }, { "epoch": 0.13986838012739972, "grad_norm": 0.341796875, "learning_rate": 0.0029782654094626525, "loss": 1.3063, "step": 1595 }, { "epoch": 0.13995607190177425, "grad_norm": 0.10107421875, "learning_rate": 0.0029781924881173137, "loss": 1.3055, "step": 1596 }, { "epoch": 0.1400437636761488, "grad_norm": 0.29296875, "learning_rate": 0.0029781194456442485, "loss": 1.3568, "step": 1597 }, { "epoch": 0.14013145545052333, "grad_norm": 0.1416015625, "learning_rate": 0.0029780462820501188, "loss": 1.2725, "step": 1598 }, { "epoch": 0.1402191472248979, "grad_norm": 0.232421875, "learning_rate": 0.0029779729973415958, "loss": 1.2852, "step": 1599 }, { "epoch": 0.14030683899927243, "grad_norm": 0.23046875, "learning_rate": 0.0029778995915253643, "loss": 1.3203, "step": 1600 }, { "epoch": 0.14039453077364697, "grad_norm": 0.11962890625, "learning_rate": 0.002977826064608118, "loss": 1.2179, "step": 1601 }, { "epoch": 0.14048222254802153, "grad_norm": 0.1240234375, "learning_rate": 0.002977752416596562, "loss": 1.238, "step": 1602 }, { "epoch": 0.14056991432239607, "grad_norm": 0.1630859375, "learning_rate": 0.002977678647497413, "loss": 1.3133, "step": 1603 }, { "epoch": 0.1406576060967706, "grad_norm": 0.0791015625, "learning_rate": 0.0029776047573173993, "loss": 1.3257, "step": 1604 }, { "epoch": 0.14074529787114515, "grad_norm": 0.19140625, "learning_rate": 0.0029775307460632584, "loss": 1.289, "step": 1605 }, { "epoch": 0.1408329896455197, "grad_norm": 0.09423828125, "learning_rate": 0.002977456613741741, "loss": 1.3245, "step": 1606 }, { "epoch": 0.14092068141989425, "grad_norm": 0.1630859375, "learning_rate": 0.002977382360359607, "loss": 1.3248, "step": 1607 }, { "epoch": 0.1410083731942688, "grad_norm": 0.078125, "learning_rate": 0.002977307985923628, "loss": 1.3047, "step": 1608 }, { "epoch": 0.14109606496864335, "grad_norm": 0.1787109375, "learning_rate": 0.0029772334904405876, "loss": 1.2525, "step": 1609 }, { "epoch": 0.1411837567430179, "grad_norm": 0.07470703125, "learning_rate": 0.002977158873917279, "loss": 1.3219, "step": 1610 }, { "epoch": 0.14127144851739243, "grad_norm": 0.1484375, "learning_rate": 0.0029770841363605067, "loss": 1.2669, "step": 1611 }, { "epoch": 0.14135914029176697, "grad_norm": 0.1064453125, "learning_rate": 0.0029770092777770874, "loss": 1.3024, "step": 1612 }, { "epoch": 0.14144683206614153, "grad_norm": 0.10791015625, "learning_rate": 0.002976934298173848, "loss": 1.3109, "step": 1613 }, { "epoch": 0.14153452384051607, "grad_norm": 0.1298828125, "learning_rate": 0.0029768591975576253, "loss": 1.2855, "step": 1614 }, { "epoch": 0.1416222156148906, "grad_norm": 0.12451171875, "learning_rate": 0.0029767839759352694, "loss": 1.2878, "step": 1615 }, { "epoch": 0.14170990738926514, "grad_norm": 0.06982421875, "learning_rate": 0.0029767086333136405, "loss": 1.3303, "step": 1616 }, { "epoch": 0.1417975991636397, "grad_norm": 0.13671875, "learning_rate": 0.0029766331696996083, "loss": 1.3565, "step": 1617 }, { "epoch": 0.14188529093801425, "grad_norm": 0.11767578125, "learning_rate": 0.002976557585100056, "loss": 1.3557, "step": 1618 }, { "epoch": 0.14197298271238878, "grad_norm": 0.1015625, "learning_rate": 0.0029764818795218763, "loss": 1.3797, "step": 1619 }, { "epoch": 0.14206067448676335, "grad_norm": 0.2060546875, "learning_rate": 0.002976406052971974, "loss": 1.3364, "step": 1620 }, { "epoch": 0.1421483662611379, "grad_norm": 0.08154296875, "learning_rate": 0.0029763301054572633, "loss": 1.3215, "step": 1621 }, { "epoch": 0.14223605803551242, "grad_norm": 0.154296875, "learning_rate": 0.0029762540369846708, "loss": 1.2655, "step": 1622 }, { "epoch": 0.14232374980988696, "grad_norm": 0.09228515625, "learning_rate": 0.002976177847561134, "loss": 1.2315, "step": 1623 }, { "epoch": 0.14241144158426153, "grad_norm": 0.1396484375, "learning_rate": 0.0029761015371936013, "loss": 1.315, "step": 1624 }, { "epoch": 0.14249913335863607, "grad_norm": 0.09619140625, "learning_rate": 0.0029760251058890312, "loss": 1.3761, "step": 1625 }, { "epoch": 0.1425868251330106, "grad_norm": 0.171875, "learning_rate": 0.002975948553654395, "loss": 1.3818, "step": 1626 }, { "epoch": 0.14267451690738517, "grad_norm": 0.09716796875, "learning_rate": 0.002975871880496673, "loss": 1.2746, "step": 1627 }, { "epoch": 0.1427622086817597, "grad_norm": 0.150390625, "learning_rate": 0.002975795086422859, "loss": 1.2724, "step": 1628 }, { "epoch": 0.14284990045613424, "grad_norm": 0.0634765625, "learning_rate": 0.002975718171439955, "loss": 1.2987, "step": 1629 }, { "epoch": 0.14293759223050878, "grad_norm": 0.1376953125, "learning_rate": 0.002975641135554977, "loss": 1.303, "step": 1630 }, { "epoch": 0.14302528400488335, "grad_norm": 0.08642578125, "learning_rate": 0.0029755639787749488, "loss": 1.3062, "step": 1631 }, { "epoch": 0.14311297577925788, "grad_norm": 0.09423828125, "learning_rate": 0.0029754867011069076, "loss": 1.2757, "step": 1632 }, { "epoch": 0.14320066755363242, "grad_norm": 0.07275390625, "learning_rate": 0.0029754093025579015, "loss": 1.2948, "step": 1633 }, { "epoch": 0.14328835932800696, "grad_norm": 0.138671875, "learning_rate": 0.002975331783134988, "loss": 1.2779, "step": 1634 }, { "epoch": 0.14337605110238152, "grad_norm": 0.0927734375, "learning_rate": 0.0029752541428452375, "loss": 1.254, "step": 1635 }, { "epoch": 0.14346374287675606, "grad_norm": 0.09375, "learning_rate": 0.0029751763816957305, "loss": 1.2744, "step": 1636 }, { "epoch": 0.1435514346511306, "grad_norm": 0.0888671875, "learning_rate": 0.002975098499693558, "loss": 1.3138, "step": 1637 }, { "epoch": 0.14363912642550516, "grad_norm": 0.0673828125, "learning_rate": 0.0029750204968458233, "loss": 1.3148, "step": 1638 }, { "epoch": 0.1437268181998797, "grad_norm": 0.09619140625, "learning_rate": 0.0029749423731596394, "loss": 1.2637, "step": 1639 }, { "epoch": 0.14381450997425424, "grad_norm": 0.0732421875, "learning_rate": 0.002974864128642132, "loss": 1.273, "step": 1640 }, { "epoch": 0.14390220174862878, "grad_norm": 0.072265625, "learning_rate": 0.002974785763300436, "loss": 1.3235, "step": 1641 }, { "epoch": 0.14398989352300334, "grad_norm": 0.1044921875, "learning_rate": 0.002974707277141698, "loss": 1.2963, "step": 1642 }, { "epoch": 0.14407758529737788, "grad_norm": 0.083984375, "learning_rate": 0.0029746286701730763, "loss": 1.3017, "step": 1643 }, { "epoch": 0.14416527707175242, "grad_norm": 0.0693359375, "learning_rate": 0.0029745499424017395, "loss": 1.3372, "step": 1644 }, { "epoch": 0.14425296884612698, "grad_norm": 0.142578125, "learning_rate": 0.002974471093834867, "loss": 1.3172, "step": 1645 }, { "epoch": 0.14434066062050152, "grad_norm": 0.1494140625, "learning_rate": 0.00297439212447965, "loss": 1.3033, "step": 1646 }, { "epoch": 0.14442835239487606, "grad_norm": 0.09130859375, "learning_rate": 0.00297431303434329, "loss": 1.3191, "step": 1647 }, { "epoch": 0.1445160441692506, "grad_norm": 0.1689453125, "learning_rate": 0.002974233823433, "loss": 1.326, "step": 1648 }, { "epoch": 0.14460373594362516, "grad_norm": 0.1513671875, "learning_rate": 0.002974154491756004, "loss": 1.2827, "step": 1649 }, { "epoch": 0.1446914277179997, "grad_norm": 0.076171875, "learning_rate": 0.002974075039319536, "loss": 1.249, "step": 1650 }, { "epoch": 0.14477911949237424, "grad_norm": 0.10693359375, "learning_rate": 0.0029739954661308437, "loss": 1.2419, "step": 1651 }, { "epoch": 0.14486681126674877, "grad_norm": 0.08251953125, "learning_rate": 0.002973915772197182, "loss": 1.2723, "step": 1652 }, { "epoch": 0.14495450304112334, "grad_norm": 0.07763671875, "learning_rate": 0.0029738359575258192, "loss": 1.333, "step": 1653 }, { "epoch": 0.14504219481549788, "grad_norm": 0.06689453125, "learning_rate": 0.002973756022124035, "loss": 1.308, "step": 1654 }, { "epoch": 0.14512988658987241, "grad_norm": 0.062255859375, "learning_rate": 0.0029736759659991186, "loss": 1.3056, "step": 1655 }, { "epoch": 0.14521757836424698, "grad_norm": 0.078125, "learning_rate": 0.0029735957891583713, "loss": 1.2666, "step": 1656 }, { "epoch": 0.14530527013862152, "grad_norm": 0.0927734375, "learning_rate": 0.0029735154916091053, "loss": 1.3374, "step": 1657 }, { "epoch": 0.14539296191299605, "grad_norm": 0.0830078125, "learning_rate": 0.002973435073358643, "loss": 1.335, "step": 1658 }, { "epoch": 0.1454806536873706, "grad_norm": 0.1328125, "learning_rate": 0.0029733545344143176, "loss": 1.3012, "step": 1659 }, { "epoch": 0.14556834546174516, "grad_norm": 0.10595703125, "learning_rate": 0.002973273874783475, "loss": 1.2851, "step": 1660 }, { "epoch": 0.1456560372361197, "grad_norm": 0.07958984375, "learning_rate": 0.0029731930944734723, "loss": 1.32, "step": 1661 }, { "epoch": 0.14574372901049423, "grad_norm": 0.06787109375, "learning_rate": 0.0029731121934916736, "loss": 1.3265, "step": 1662 }, { "epoch": 0.14583142078486877, "grad_norm": 0.07373046875, "learning_rate": 0.0029730311718454594, "loss": 1.4071, "step": 1663 }, { "epoch": 0.14591911255924334, "grad_norm": 0.1181640625, "learning_rate": 0.002972950029542218, "loss": 1.3511, "step": 1664 }, { "epoch": 0.14600680433361787, "grad_norm": 0.279296875, "learning_rate": 0.002972868766589348, "loss": 1.3421, "step": 1665 }, { "epoch": 0.1460944961079924, "grad_norm": 0.31640625, "learning_rate": 0.0029727873829942623, "loss": 1.343, "step": 1666 }, { "epoch": 0.14618218788236698, "grad_norm": 0.07568359375, "learning_rate": 0.002972705878764382, "loss": 1.325, "step": 1667 }, { "epoch": 0.1462698796567415, "grad_norm": 0.2216796875, "learning_rate": 0.0029726242539071398, "loss": 1.2319, "step": 1668 }, { "epoch": 0.14635757143111605, "grad_norm": 0.091796875, "learning_rate": 0.0029725425084299803, "loss": 1.2526, "step": 1669 }, { "epoch": 0.1464452632054906, "grad_norm": 0.25390625, "learning_rate": 0.0029724606423403577, "loss": 1.3712, "step": 1670 }, { "epoch": 0.14653295497986515, "grad_norm": 0.10107421875, "learning_rate": 0.0029723786556457386, "loss": 1.2874, "step": 1671 }, { "epoch": 0.1466206467542397, "grad_norm": 0.20703125, "learning_rate": 0.0029722965483535996, "loss": 1.2487, "step": 1672 }, { "epoch": 0.14670833852861423, "grad_norm": 0.091796875, "learning_rate": 0.0029722143204714293, "loss": 1.3243, "step": 1673 }, { "epoch": 0.1467960303029888, "grad_norm": 0.1982421875, "learning_rate": 0.0029721319720067262, "loss": 1.314, "step": 1674 }, { "epoch": 0.14688372207736333, "grad_norm": 0.10888671875, "learning_rate": 0.002972049502967, "loss": 1.3093, "step": 1675 }, { "epoch": 0.14697141385173787, "grad_norm": 0.1455078125, "learning_rate": 0.0029719669133597723, "loss": 1.3873, "step": 1676 }, { "epoch": 0.1470591056261124, "grad_norm": 0.13671875, "learning_rate": 0.002971884203192575, "loss": 1.3275, "step": 1677 }, { "epoch": 0.14714679740048697, "grad_norm": 0.07568359375, "learning_rate": 0.00297180137247295, "loss": 1.3174, "step": 1678 }, { "epoch": 0.1472344891748615, "grad_norm": 0.1328125, "learning_rate": 0.002971718421208453, "loss": 1.295, "step": 1679 }, { "epoch": 0.14732218094923605, "grad_norm": 0.150390625, "learning_rate": 0.002971635349406647, "loss": 1.2753, "step": 1680 }, { "epoch": 0.14740987272361059, "grad_norm": 0.0849609375, "learning_rate": 0.0029715521570751096, "loss": 1.3296, "step": 1681 }, { "epoch": 0.14749756449798515, "grad_norm": 0.1259765625, "learning_rate": 0.002971468844221427, "loss": 1.2714, "step": 1682 }, { "epoch": 0.1475852562723597, "grad_norm": 0.09912109375, "learning_rate": 0.0029713854108531965, "loss": 1.2962, "step": 1683 }, { "epoch": 0.14767294804673423, "grad_norm": 0.08203125, "learning_rate": 0.0029713018569780284, "loss": 1.2805, "step": 1684 }, { "epoch": 0.1477606398211088, "grad_norm": 0.08642578125, "learning_rate": 0.0029712181826035415, "loss": 1.2831, "step": 1685 }, { "epoch": 0.14784833159548333, "grad_norm": 0.0751953125, "learning_rate": 0.0029711343877373672, "loss": 1.295, "step": 1686 }, { "epoch": 0.14793602336985787, "grad_norm": 0.0947265625, "learning_rate": 0.0029710504723871474, "loss": 1.2647, "step": 1687 }, { "epoch": 0.1480237151442324, "grad_norm": 0.0732421875, "learning_rate": 0.0029709664365605345, "loss": 1.3441, "step": 1688 }, { "epoch": 0.14811140691860697, "grad_norm": 0.1904296875, "learning_rate": 0.0029708822802651928, "loss": 1.3064, "step": 1689 }, { "epoch": 0.1481990986929815, "grad_norm": 0.08349609375, "learning_rate": 0.0029707980035087967, "loss": 1.2509, "step": 1690 }, { "epoch": 0.14828679046735604, "grad_norm": 0.224609375, "learning_rate": 0.002970713606299033, "loss": 1.2616, "step": 1691 }, { "epoch": 0.1483744822417306, "grad_norm": 0.1474609375, "learning_rate": 0.002970629088643597, "loss": 1.3159, "step": 1692 }, { "epoch": 0.14846217401610515, "grad_norm": 0.11083984375, "learning_rate": 0.0029705444505501977, "loss": 1.3307, "step": 1693 }, { "epoch": 0.14854986579047968, "grad_norm": 0.1416015625, "learning_rate": 0.0029704596920265536, "loss": 1.2977, "step": 1694 }, { "epoch": 0.14863755756485422, "grad_norm": 0.09619140625, "learning_rate": 0.0029703748130803943, "loss": 1.2808, "step": 1695 }, { "epoch": 0.1487252493392288, "grad_norm": 0.0908203125, "learning_rate": 0.0029702898137194604, "loss": 1.2882, "step": 1696 }, { "epoch": 0.14881294111360333, "grad_norm": 0.09423828125, "learning_rate": 0.0029702046939515036, "loss": 1.3025, "step": 1697 }, { "epoch": 0.14890063288797786, "grad_norm": 0.06640625, "learning_rate": 0.002970119453784287, "loss": 1.3931, "step": 1698 }, { "epoch": 0.1489883246623524, "grad_norm": 0.10986328125, "learning_rate": 0.0029700340932255842, "loss": 1.2934, "step": 1699 }, { "epoch": 0.14907601643672697, "grad_norm": 0.09130859375, "learning_rate": 0.0029699486122831795, "loss": 1.354, "step": 1700 }, { "epoch": 0.1491637082111015, "grad_norm": 0.1708984375, "learning_rate": 0.002969863010964869, "loss": 1.277, "step": 1701 }, { "epoch": 0.14925139998547604, "grad_norm": 0.1318359375, "learning_rate": 0.002969777289278459, "loss": 1.2748, "step": 1702 }, { "epoch": 0.1493390917598506, "grad_norm": 0.09912109375, "learning_rate": 0.0029696914472317672, "loss": 1.332, "step": 1703 }, { "epoch": 0.14942678353422514, "grad_norm": 0.1201171875, "learning_rate": 0.0029696054848326226, "loss": 1.3173, "step": 1704 }, { "epoch": 0.14951447530859968, "grad_norm": 0.1435546875, "learning_rate": 0.002969519402088864, "loss": 1.3306, "step": 1705 }, { "epoch": 0.14960216708297422, "grad_norm": 0.14453125, "learning_rate": 0.002969433199008342, "loss": 1.2826, "step": 1706 }, { "epoch": 0.14968985885734878, "grad_norm": 0.0888671875, "learning_rate": 0.0029693468755989188, "loss": 1.3347, "step": 1707 }, { "epoch": 0.14977755063172332, "grad_norm": 0.10693359375, "learning_rate": 0.002969260431868466, "loss": 1.2668, "step": 1708 }, { "epoch": 0.14986524240609786, "grad_norm": 0.142578125, "learning_rate": 0.002969173867824868, "loss": 1.2978, "step": 1709 }, { "epoch": 0.14995293418047242, "grad_norm": 0.12255859375, "learning_rate": 0.002969087183476018, "loss": 1.3336, "step": 1710 }, { "epoch": 0.15004062595484696, "grad_norm": 0.0732421875, "learning_rate": 0.0029690003788298224, "loss": 1.3322, "step": 1711 }, { "epoch": 0.1501283177292215, "grad_norm": 0.0625, "learning_rate": 0.002968913453894197, "loss": 1.2582, "step": 1712 }, { "epoch": 0.15021600950359604, "grad_norm": 0.126953125, "learning_rate": 0.00296882640867707, "loss": 1.2509, "step": 1713 }, { "epoch": 0.1503037012779706, "grad_norm": 0.0732421875, "learning_rate": 0.0029687392431863788, "loss": 1.3008, "step": 1714 }, { "epoch": 0.15039139305234514, "grad_norm": 0.1826171875, "learning_rate": 0.0029686519574300724, "loss": 1.4032, "step": 1715 }, { "epoch": 0.15047908482671968, "grad_norm": 0.1005859375, "learning_rate": 0.0029685645514161123, "loss": 1.2226, "step": 1716 }, { "epoch": 0.15056677660109422, "grad_norm": 0.130859375, "learning_rate": 0.0029684770251524684, "loss": 1.258, "step": 1717 }, { "epoch": 0.15065446837546878, "grad_norm": 0.1591796875, "learning_rate": 0.002968389378647124, "loss": 1.3325, "step": 1718 }, { "epoch": 0.15074216014984332, "grad_norm": 0.083984375, "learning_rate": 0.0029683016119080715, "loss": 1.37, "step": 1719 }, { "epoch": 0.15082985192421786, "grad_norm": 0.33203125, "learning_rate": 0.002968213724943315, "loss": 1.3357, "step": 1720 }, { "epoch": 0.15091754369859242, "grad_norm": 0.25, "learning_rate": 0.0029681257177608697, "loss": 1.3055, "step": 1721 }, { "epoch": 0.15100523547296696, "grad_norm": 0.1962890625, "learning_rate": 0.0029680375903687614, "loss": 1.2907, "step": 1722 }, { "epoch": 0.1510929272473415, "grad_norm": 0.26953125, "learning_rate": 0.0029679493427750277, "loss": 1.3671, "step": 1723 }, { "epoch": 0.15118061902171603, "grad_norm": 0.10888671875, "learning_rate": 0.002967860974987716, "loss": 1.3317, "step": 1724 }, { "epoch": 0.1512683107960906, "grad_norm": 0.2392578125, "learning_rate": 0.002967772487014886, "loss": 1.3038, "step": 1725 }, { "epoch": 0.15135600257046514, "grad_norm": 0.061279296875, "learning_rate": 0.0029676838788646066, "loss": 1.3724, "step": 1726 }, { "epoch": 0.15144369434483967, "grad_norm": 0.1279296875, "learning_rate": 0.0029675951505449593, "loss": 1.3427, "step": 1727 }, { "epoch": 0.15153138611921424, "grad_norm": 0.0791015625, "learning_rate": 0.002967506302064035, "loss": 1.3172, "step": 1728 }, { "epoch": 0.15161907789358878, "grad_norm": 0.1015625, "learning_rate": 0.0029674173334299377, "loss": 1.266, "step": 1729 }, { "epoch": 0.15170676966796331, "grad_norm": 0.07177734375, "learning_rate": 0.00296732824465078, "loss": 1.3791, "step": 1730 }, { "epoch": 0.15179446144233785, "grad_norm": 0.0830078125, "learning_rate": 0.0029672390357346873, "loss": 1.2942, "step": 1731 }, { "epoch": 0.15188215321671242, "grad_norm": 0.09375, "learning_rate": 0.002967149706689795, "loss": 1.3361, "step": 1732 }, { "epoch": 0.15196984499108696, "grad_norm": 0.146484375, "learning_rate": 0.00296706025752425, "loss": 1.2497, "step": 1733 }, { "epoch": 0.1520575367654615, "grad_norm": 0.06689453125, "learning_rate": 0.002966970688246209, "loss": 1.2908, "step": 1734 }, { "epoch": 0.15214522853983603, "grad_norm": 0.10009765625, "learning_rate": 0.0029668809988638405, "loss": 1.3164, "step": 1735 }, { "epoch": 0.1522329203142106, "grad_norm": 0.0751953125, "learning_rate": 0.002966791189385325, "loss": 1.2638, "step": 1736 }, { "epoch": 0.15232061208858513, "grad_norm": 0.11376953125, "learning_rate": 0.0029667012598188526, "loss": 1.331, "step": 1737 }, { "epoch": 0.15240830386295967, "grad_norm": 0.07373046875, "learning_rate": 0.0029666112101726237, "loss": 1.2418, "step": 1738 }, { "epoch": 0.15249599563733424, "grad_norm": 0.1279296875, "learning_rate": 0.002966521040454852, "loss": 1.31, "step": 1739 }, { "epoch": 0.15258368741170877, "grad_norm": 0.16015625, "learning_rate": 0.0029664307506737596, "loss": 1.3026, "step": 1740 }, { "epoch": 0.1526713791860833, "grad_norm": 0.11181640625, "learning_rate": 0.0029663403408375813, "loss": 1.2625, "step": 1741 }, { "epoch": 0.15275907096045785, "grad_norm": 0.1318359375, "learning_rate": 0.002966249810954561, "loss": 1.2715, "step": 1742 }, { "epoch": 0.15284676273483241, "grad_norm": 0.1142578125, "learning_rate": 0.002966159161032957, "loss": 1.2995, "step": 1743 }, { "epoch": 0.15293445450920695, "grad_norm": 0.109375, "learning_rate": 0.002966068391081035, "loss": 1.2396, "step": 1744 }, { "epoch": 0.1530221462835815, "grad_norm": 0.119140625, "learning_rate": 0.002965977501107073, "loss": 1.2447, "step": 1745 }, { "epoch": 0.15310983805795605, "grad_norm": 0.099609375, "learning_rate": 0.0029658864911193596, "loss": 1.3183, "step": 1746 }, { "epoch": 0.1531975298323306, "grad_norm": 0.06201171875, "learning_rate": 0.0029657953611261956, "loss": 1.2936, "step": 1747 }, { "epoch": 0.15328522160670513, "grad_norm": 0.1025390625, "learning_rate": 0.0029657041111358917, "loss": 1.2693, "step": 1748 }, { "epoch": 0.15337291338107967, "grad_norm": 0.0771484375, "learning_rate": 0.0029656127411567686, "loss": 1.2768, "step": 1749 }, { "epoch": 0.15346060515545423, "grad_norm": 0.08447265625, "learning_rate": 0.0029655212511971603, "loss": 1.2926, "step": 1750 }, { "epoch": 0.15354829692982877, "grad_norm": 0.1416015625, "learning_rate": 0.0029654296412654096, "loss": 1.3025, "step": 1751 }, { "epoch": 0.1536359887042033, "grad_norm": 0.1630859375, "learning_rate": 0.0029653379113698717, "loss": 1.3208, "step": 1752 }, { "epoch": 0.15372368047857785, "grad_norm": 0.10888671875, "learning_rate": 0.0029652460615189114, "loss": 1.2135, "step": 1753 }, { "epoch": 0.1538113722529524, "grad_norm": 0.142578125, "learning_rate": 0.002965154091720906, "loss": 1.3043, "step": 1754 }, { "epoch": 0.15389906402732695, "grad_norm": 0.10498046875, "learning_rate": 0.002965062001984242, "loss": 1.3056, "step": 1755 }, { "epoch": 0.15398675580170149, "grad_norm": 0.29296875, "learning_rate": 0.0029649697923173185, "loss": 1.3145, "step": 1756 }, { "epoch": 0.15407444757607605, "grad_norm": 0.171875, "learning_rate": 0.0029648774627285446, "loss": 1.2702, "step": 1757 }, { "epoch": 0.1541621393504506, "grad_norm": 0.302734375, "learning_rate": 0.0029647850132263403, "loss": 1.387, "step": 1758 }, { "epoch": 0.15424983112482513, "grad_norm": 0.453125, "learning_rate": 0.002964692443819137, "loss": 1.3019, "step": 1759 }, { "epoch": 0.15433752289919966, "grad_norm": 0.1552734375, "learning_rate": 0.002964599754515377, "loss": 1.2936, "step": 1760 }, { "epoch": 0.15442521467357423, "grad_norm": 0.478515625, "learning_rate": 0.002964506945323512, "loss": 1.3148, "step": 1761 }, { "epoch": 0.15451290644794877, "grad_norm": 0.1806640625, "learning_rate": 0.002964414016252008, "loss": 1.271, "step": 1762 }, { "epoch": 0.1546005982223233, "grad_norm": 0.392578125, "learning_rate": 0.0029643209673093384, "loss": 1.3235, "step": 1763 }, { "epoch": 0.15468828999669787, "grad_norm": 0.1396484375, "learning_rate": 0.0029642277985039893, "loss": 1.3947, "step": 1764 }, { "epoch": 0.1547759817710724, "grad_norm": 0.267578125, "learning_rate": 0.0029641345098444585, "loss": 1.2814, "step": 1765 }, { "epoch": 0.15486367354544694, "grad_norm": 0.12890625, "learning_rate": 0.002964041101339252, "loss": 1.3002, "step": 1766 }, { "epoch": 0.15495136531982148, "grad_norm": 0.16015625, "learning_rate": 0.0029639475729968893, "loss": 1.2534, "step": 1767 }, { "epoch": 0.15503905709419605, "grad_norm": 0.1796875, "learning_rate": 0.0029638539248259, "loss": 1.3228, "step": 1768 }, { "epoch": 0.15512674886857059, "grad_norm": 0.08837890625, "learning_rate": 0.002963760156834825, "loss": 1.2834, "step": 1769 }, { "epoch": 0.15521444064294512, "grad_norm": 0.2001953125, "learning_rate": 0.0029636662690322143, "loss": 1.3193, "step": 1770 }, { "epoch": 0.15530213241731966, "grad_norm": 0.072265625, "learning_rate": 0.0029635722614266315, "loss": 1.3285, "step": 1771 }, { "epoch": 0.15538982419169423, "grad_norm": 0.1123046875, "learning_rate": 0.002963478134026649, "loss": 1.3342, "step": 1772 }, { "epoch": 0.15547751596606876, "grad_norm": 0.12060546875, "learning_rate": 0.002963383886840852, "loss": 1.2917, "step": 1773 }, { "epoch": 0.1555652077404433, "grad_norm": 0.1015625, "learning_rate": 0.002963289519877835, "loss": 1.2716, "step": 1774 }, { "epoch": 0.15565289951481787, "grad_norm": 0.10986328125, "learning_rate": 0.0029631950331462037, "loss": 1.3703, "step": 1775 }, { "epoch": 0.1557405912891924, "grad_norm": 0.109375, "learning_rate": 0.0029631004266545756, "loss": 1.2962, "step": 1776 }, { "epoch": 0.15582828306356694, "grad_norm": 0.10986328125, "learning_rate": 0.002963005700411578, "loss": 1.2938, "step": 1777 }, { "epoch": 0.15591597483794148, "grad_norm": 0.14453125, "learning_rate": 0.00296291085442585, "loss": 1.3536, "step": 1778 }, { "epoch": 0.15600366661231604, "grad_norm": 0.08984375, "learning_rate": 0.0029628158887060416, "loss": 1.2311, "step": 1779 }, { "epoch": 0.15609135838669058, "grad_norm": 0.13671875, "learning_rate": 0.002962720803260813, "loss": 1.2521, "step": 1780 }, { "epoch": 0.15617905016106512, "grad_norm": 0.1181640625, "learning_rate": 0.0029626255980988365, "loss": 1.2764, "step": 1781 }, { "epoch": 0.15626674193543968, "grad_norm": 0.08740234375, "learning_rate": 0.0029625302732287934, "loss": 1.2357, "step": 1782 }, { "epoch": 0.15635443370981422, "grad_norm": 0.076171875, "learning_rate": 0.0029624348286593776, "loss": 1.3649, "step": 1783 }, { "epoch": 0.15644212548418876, "grad_norm": 0.10693359375, "learning_rate": 0.0029623392643992937, "loss": 1.3346, "step": 1784 }, { "epoch": 0.1565298172585633, "grad_norm": 0.07373046875, "learning_rate": 0.0029622435804572563, "loss": 1.3027, "step": 1785 }, { "epoch": 0.15661750903293786, "grad_norm": 0.1484375, "learning_rate": 0.0029621477768419927, "loss": 1.3065, "step": 1786 }, { "epoch": 0.1567052008073124, "grad_norm": 0.1064453125, "learning_rate": 0.002962051853562238, "loss": 1.2124, "step": 1787 }, { "epoch": 0.15679289258168694, "grad_norm": 0.11962890625, "learning_rate": 0.0029619558106267424, "loss": 1.3163, "step": 1788 }, { "epoch": 0.15688058435606148, "grad_norm": 0.142578125, "learning_rate": 0.0029618596480442635, "loss": 1.266, "step": 1789 }, { "epoch": 0.15696827613043604, "grad_norm": 0.1201171875, "learning_rate": 0.0029617633658235707, "loss": 1.3783, "step": 1790 }, { "epoch": 0.15705596790481058, "grad_norm": 0.2177734375, "learning_rate": 0.0029616669639734457, "loss": 1.3268, "step": 1791 }, { "epoch": 0.15714365967918512, "grad_norm": 0.083984375, "learning_rate": 0.002961570442502679, "loss": 1.2861, "step": 1792 }, { "epoch": 0.15723135145355968, "grad_norm": 0.09716796875, "learning_rate": 0.0029614738014200745, "loss": 1.3748, "step": 1793 }, { "epoch": 0.15731904322793422, "grad_norm": 0.0859375, "learning_rate": 0.0029613770407344447, "loss": 1.2581, "step": 1794 }, { "epoch": 0.15740673500230876, "grad_norm": 0.07763671875, "learning_rate": 0.002961280160454614, "loss": 1.2899, "step": 1795 }, { "epoch": 0.1574944267766833, "grad_norm": 0.07373046875, "learning_rate": 0.0029611831605894172, "loss": 1.2457, "step": 1796 }, { "epoch": 0.15758211855105786, "grad_norm": 0.0830078125, "learning_rate": 0.0029610860411477015, "loss": 1.3766, "step": 1797 }, { "epoch": 0.1576698103254324, "grad_norm": 0.06494140625, "learning_rate": 0.0029609888021383235, "loss": 1.3176, "step": 1798 }, { "epoch": 0.15775750209980693, "grad_norm": 0.0869140625, "learning_rate": 0.002960891443570151, "loss": 1.2908, "step": 1799 }, { "epoch": 0.1578451938741815, "grad_norm": 0.07470703125, "learning_rate": 0.0029607939654520627, "loss": 1.305, "step": 1800 }, { "epoch": 0.15793288564855604, "grad_norm": 0.1005859375, "learning_rate": 0.0029606963677929485, "loss": 1.2922, "step": 1801 }, { "epoch": 0.15802057742293057, "grad_norm": 0.11572265625, "learning_rate": 0.0029605986506017093, "loss": 1.2961, "step": 1802 }, { "epoch": 0.1581082691973051, "grad_norm": 0.11328125, "learning_rate": 0.0029605008138872562, "loss": 1.2114, "step": 1803 }, { "epoch": 0.15819596097167968, "grad_norm": 0.126953125, "learning_rate": 0.0029604028576585124, "loss": 1.2436, "step": 1804 }, { "epoch": 0.15828365274605422, "grad_norm": 0.1298828125, "learning_rate": 0.00296030478192441, "loss": 1.3183, "step": 1805 }, { "epoch": 0.15837134452042875, "grad_norm": 0.171875, "learning_rate": 0.002960206586693895, "loss": 1.2487, "step": 1806 }, { "epoch": 0.1584590362948033, "grad_norm": 0.0703125, "learning_rate": 0.002960108271975921, "loss": 1.3022, "step": 1807 }, { "epoch": 0.15854672806917786, "grad_norm": 0.158203125, "learning_rate": 0.0029600098377794543, "loss": 1.3295, "step": 1808 }, { "epoch": 0.1586344198435524, "grad_norm": 0.1748046875, "learning_rate": 0.0029599112841134723, "loss": 1.297, "step": 1809 }, { "epoch": 0.15872211161792693, "grad_norm": 0.0966796875, "learning_rate": 0.0029598126109869633, "loss": 1.3147, "step": 1810 }, { "epoch": 0.1588098033923015, "grad_norm": 0.1435546875, "learning_rate": 0.002959713818408925, "loss": 1.2913, "step": 1811 }, { "epoch": 0.15889749516667603, "grad_norm": 0.2255859375, "learning_rate": 0.0029596149063883677, "loss": 1.233, "step": 1812 }, { "epoch": 0.15898518694105057, "grad_norm": 0.0927734375, "learning_rate": 0.0029595158749343114, "loss": 1.2982, "step": 1813 }, { "epoch": 0.1590728787154251, "grad_norm": 0.2412109375, "learning_rate": 0.0029594167240557883, "loss": 1.3088, "step": 1814 }, { "epoch": 0.15916057048979967, "grad_norm": 0.11572265625, "learning_rate": 0.0029593174537618392, "loss": 1.3154, "step": 1815 }, { "epoch": 0.1592482622641742, "grad_norm": 0.169921875, "learning_rate": 0.0029592180640615195, "loss": 1.2972, "step": 1816 }, { "epoch": 0.15933595403854875, "grad_norm": 0.1650390625, "learning_rate": 0.0029591185549638914, "loss": 1.32, "step": 1817 }, { "epoch": 0.15942364581292331, "grad_norm": 0.111328125, "learning_rate": 0.002959018926478031, "loss": 1.2938, "step": 1818 }, { "epoch": 0.15951133758729785, "grad_norm": 0.09375, "learning_rate": 0.002958919178613023, "loss": 1.2777, "step": 1819 }, { "epoch": 0.1595990293616724, "grad_norm": 0.08251953125, "learning_rate": 0.002958819311377965, "loss": 1.3409, "step": 1820 }, { "epoch": 0.15968672113604693, "grad_norm": 0.080078125, "learning_rate": 0.002958719324781965, "loss": 1.2738, "step": 1821 }, { "epoch": 0.1597744129104215, "grad_norm": 0.0869140625, "learning_rate": 0.002958619218834141, "loss": 1.2636, "step": 1822 }, { "epoch": 0.15986210468479603, "grad_norm": 0.10888671875, "learning_rate": 0.002958518993543622, "loss": 1.3311, "step": 1823 }, { "epoch": 0.15994979645917057, "grad_norm": 0.06640625, "learning_rate": 0.002958418648919549, "loss": 1.3638, "step": 1824 }, { "epoch": 0.1600374882335451, "grad_norm": 0.09130859375, "learning_rate": 0.0029583181849710733, "loss": 1.3066, "step": 1825 }, { "epoch": 0.16012518000791967, "grad_norm": 0.0634765625, "learning_rate": 0.0029582176017073558, "loss": 1.3149, "step": 1826 }, { "epoch": 0.1602128717822942, "grad_norm": 0.07080078125, "learning_rate": 0.00295811689913757, "loss": 1.2287, "step": 1827 }, { "epoch": 0.16030056355666875, "grad_norm": 0.0859375, "learning_rate": 0.002958016077270901, "loss": 1.2944, "step": 1828 }, { "epoch": 0.1603882553310433, "grad_norm": 0.15625, "learning_rate": 0.0029579151361165414, "loss": 1.3327, "step": 1829 }, { "epoch": 0.16047594710541785, "grad_norm": 0.1083984375, "learning_rate": 0.0029578140756836985, "loss": 1.2067, "step": 1830 }, { "epoch": 0.1605636388797924, "grad_norm": 0.12060546875, "learning_rate": 0.0029577128959815875, "loss": 1.2926, "step": 1831 }, { "epoch": 0.16065133065416692, "grad_norm": 0.17578125, "learning_rate": 0.0029576115970194362, "loss": 1.3344, "step": 1832 }, { "epoch": 0.1607390224285415, "grad_norm": 0.07177734375, "learning_rate": 0.0029575101788064826, "loss": 1.2922, "step": 1833 }, { "epoch": 0.16082671420291603, "grad_norm": 0.171875, "learning_rate": 0.0029574086413519766, "loss": 1.2902, "step": 1834 }, { "epoch": 0.16091440597729056, "grad_norm": 0.068359375, "learning_rate": 0.0029573069846651773, "loss": 1.3136, "step": 1835 }, { "epoch": 0.16100209775166513, "grad_norm": 0.1337890625, "learning_rate": 0.002957205208755356, "loss": 1.1953, "step": 1836 }, { "epoch": 0.16108978952603967, "grad_norm": 0.06982421875, "learning_rate": 0.0029571033136317937, "loss": 1.3136, "step": 1837 }, { "epoch": 0.1611774813004142, "grad_norm": 0.2138671875, "learning_rate": 0.002957001299303784, "loss": 1.2416, "step": 1838 }, { "epoch": 0.16126517307478874, "grad_norm": 0.1201171875, "learning_rate": 0.0029568991657806295, "loss": 1.2981, "step": 1839 }, { "epoch": 0.1613528648491633, "grad_norm": 0.11669921875, "learning_rate": 0.002956796913071645, "loss": 1.273, "step": 1840 }, { "epoch": 0.16144055662353785, "grad_norm": 0.06298828125, "learning_rate": 0.002956694541186155, "loss": 1.2522, "step": 1841 }, { "epoch": 0.16152824839791238, "grad_norm": 0.08984375, "learning_rate": 0.002956592050133497, "loss": 1.2785, "step": 1842 }, { "epoch": 0.16161594017228692, "grad_norm": 0.0869140625, "learning_rate": 0.0029564894399230165, "loss": 1.2929, "step": 1843 }, { "epoch": 0.16170363194666149, "grad_norm": 0.1103515625, "learning_rate": 0.0029563867105640716, "loss": 1.3277, "step": 1844 }, { "epoch": 0.16179132372103602, "grad_norm": 0.08251953125, "learning_rate": 0.002956283862066031, "loss": 1.2834, "step": 1845 }, { "epoch": 0.16187901549541056, "grad_norm": 0.162109375, "learning_rate": 0.0029561808944382744, "loss": 1.24, "step": 1846 }, { "epoch": 0.16196670726978513, "grad_norm": 0.083984375, "learning_rate": 0.0029560778076901926, "loss": 1.3224, "step": 1847 }, { "epoch": 0.16205439904415966, "grad_norm": 0.07421875, "learning_rate": 0.0029559746018311857, "loss": 1.296, "step": 1848 }, { "epoch": 0.1621420908185342, "grad_norm": 0.08154296875, "learning_rate": 0.002955871276870667, "loss": 1.3479, "step": 1849 }, { "epoch": 0.16222978259290874, "grad_norm": 0.07177734375, "learning_rate": 0.0029557678328180586, "loss": 1.3016, "step": 1850 }, { "epoch": 0.1623174743672833, "grad_norm": 0.0849609375, "learning_rate": 0.002955664269682795, "loss": 1.3169, "step": 1851 }, { "epoch": 0.16240516614165784, "grad_norm": 0.107421875, "learning_rate": 0.0029555605874743204, "loss": 1.2505, "step": 1852 }, { "epoch": 0.16249285791603238, "grad_norm": 0.09375, "learning_rate": 0.0029554567862020904, "loss": 1.3084, "step": 1853 }, { "epoch": 0.16258054969040694, "grad_norm": 0.162109375, "learning_rate": 0.002955352865875572, "loss": 1.3385, "step": 1854 }, { "epoch": 0.16266824146478148, "grad_norm": 0.115234375, "learning_rate": 0.002955248826504241, "loss": 1.3753, "step": 1855 }, { "epoch": 0.16275593323915602, "grad_norm": 0.11962890625, "learning_rate": 0.002955144668097588, "loss": 1.2402, "step": 1856 }, { "epoch": 0.16284362501353056, "grad_norm": 0.1552734375, "learning_rate": 0.0029550403906651087, "loss": 1.2476, "step": 1857 }, { "epoch": 0.16293131678790512, "grad_norm": 0.0712890625, "learning_rate": 0.0029549359942163157, "loss": 1.2843, "step": 1858 }, { "epoch": 0.16301900856227966, "grad_norm": 0.12890625, "learning_rate": 0.0029548314787607288, "loss": 1.2993, "step": 1859 }, { "epoch": 0.1631067003366542, "grad_norm": 0.12890625, "learning_rate": 0.002954726844307879, "loss": 1.3391, "step": 1860 }, { "epoch": 0.16319439211102874, "grad_norm": 0.1259765625, "learning_rate": 0.0029546220908673094, "loss": 1.319, "step": 1861 }, { "epoch": 0.1632820838854033, "grad_norm": 0.07568359375, "learning_rate": 0.0029545172184485733, "loss": 1.2632, "step": 1862 }, { "epoch": 0.16336977565977784, "grad_norm": 0.09033203125, "learning_rate": 0.002954412227061234, "loss": 1.2358, "step": 1863 }, { "epoch": 0.16345746743415238, "grad_norm": 0.0869140625, "learning_rate": 0.0029543071167148667, "loss": 1.2597, "step": 1864 }, { "epoch": 0.16354515920852694, "grad_norm": 0.0927734375, "learning_rate": 0.002954201887419058, "loss": 1.2416, "step": 1865 }, { "epoch": 0.16363285098290148, "grad_norm": 0.08544921875, "learning_rate": 0.002954096539183404, "loss": 1.2841, "step": 1866 }, { "epoch": 0.16372054275727602, "grad_norm": 0.087890625, "learning_rate": 0.0029539910720175113, "loss": 1.2665, "step": 1867 }, { "epoch": 0.16380823453165055, "grad_norm": 0.1044921875, "learning_rate": 0.0029538854859309997, "loss": 1.3402, "step": 1868 }, { "epoch": 0.16389592630602512, "grad_norm": 0.0693359375, "learning_rate": 0.0029537797809334977, "loss": 1.2414, "step": 1869 }, { "epoch": 0.16398361808039966, "grad_norm": 0.07275390625, "learning_rate": 0.002953673957034645, "loss": 1.2813, "step": 1870 }, { "epoch": 0.1640713098547742, "grad_norm": 0.0908203125, "learning_rate": 0.0029535680142440935, "loss": 1.2876, "step": 1871 }, { "epoch": 0.16415900162914873, "grad_norm": 0.06298828125, "learning_rate": 0.0029534619525715036, "loss": 1.2672, "step": 1872 }, { "epoch": 0.1642466934035233, "grad_norm": 0.080078125, "learning_rate": 0.0029533557720265494, "loss": 1.3085, "step": 1873 }, { "epoch": 0.16433438517789783, "grad_norm": 0.0927734375, "learning_rate": 0.0029532494726189126, "loss": 1.2638, "step": 1874 }, { "epoch": 0.16442207695227237, "grad_norm": 0.15625, "learning_rate": 0.002953143054358288, "loss": 1.2597, "step": 1875 }, { "epoch": 0.16450976872664694, "grad_norm": 0.09912109375, "learning_rate": 0.0029530365172543817, "loss": 1.2191, "step": 1876 }, { "epoch": 0.16459746050102148, "grad_norm": 0.0830078125, "learning_rate": 0.002952929861316908, "loss": 1.297, "step": 1877 }, { "epoch": 0.164685152275396, "grad_norm": 0.0673828125, "learning_rate": 0.002952823086555595, "loss": 1.2995, "step": 1878 }, { "epoch": 0.16477284404977055, "grad_norm": 0.0595703125, "learning_rate": 0.0029527161929801798, "loss": 1.2785, "step": 1879 }, { "epoch": 0.16486053582414512, "grad_norm": 0.0703125, "learning_rate": 0.00295260918060041, "loss": 1.2669, "step": 1880 }, { "epoch": 0.16494822759851965, "grad_norm": 0.0673828125, "learning_rate": 0.0029525020494260462, "loss": 1.2682, "step": 1881 }, { "epoch": 0.1650359193728942, "grad_norm": 0.056396484375, "learning_rate": 0.0029523947994668583, "loss": 1.3312, "step": 1882 }, { "epoch": 0.16512361114726876, "grad_norm": 0.12890625, "learning_rate": 0.0029522874307326263, "loss": 1.3099, "step": 1883 }, { "epoch": 0.1652113029216433, "grad_norm": 0.16796875, "learning_rate": 0.002952179943233142, "loss": 1.2679, "step": 1884 }, { "epoch": 0.16529899469601783, "grad_norm": 0.07421875, "learning_rate": 0.002952072336978209, "loss": 1.2699, "step": 1885 }, { "epoch": 0.16538668647039237, "grad_norm": 0.10205078125, "learning_rate": 0.00295196461197764, "loss": 1.2717, "step": 1886 }, { "epoch": 0.16547437824476693, "grad_norm": 0.076171875, "learning_rate": 0.0029518567682412593, "loss": 1.2344, "step": 1887 }, { "epoch": 0.16556207001914147, "grad_norm": 0.1044921875, "learning_rate": 0.0029517488057789025, "loss": 1.2496, "step": 1888 }, { "epoch": 0.165649761793516, "grad_norm": 0.1748046875, "learning_rate": 0.002951640724600414, "loss": 1.32, "step": 1889 }, { "epoch": 0.16573745356789055, "grad_norm": 0.158203125, "learning_rate": 0.0029515325247156526, "loss": 1.3256, "step": 1890 }, { "epoch": 0.1658251453422651, "grad_norm": 0.11328125, "learning_rate": 0.0029514242061344847, "loss": 1.3123, "step": 1891 }, { "epoch": 0.16591283711663965, "grad_norm": 0.0751953125, "learning_rate": 0.0029513157688667883, "loss": 1.317, "step": 1892 }, { "epoch": 0.1660005288910142, "grad_norm": 0.12158203125, "learning_rate": 0.002951207212922454, "loss": 1.2483, "step": 1893 }, { "epoch": 0.16608822066538875, "grad_norm": 0.07373046875, "learning_rate": 0.00295109853831138, "loss": 1.3007, "step": 1894 }, { "epoch": 0.1661759124397633, "grad_norm": 0.0625, "learning_rate": 0.0029509897450434785, "loss": 1.2602, "step": 1895 }, { "epoch": 0.16626360421413783, "grad_norm": 0.0751953125, "learning_rate": 0.002950880833128671, "loss": 1.3378, "step": 1896 }, { "epoch": 0.16635129598851237, "grad_norm": 0.06982421875, "learning_rate": 0.0029507718025768895, "loss": 1.3414, "step": 1897 }, { "epoch": 0.16643898776288693, "grad_norm": 0.09814453125, "learning_rate": 0.0029506626533980776, "loss": 1.268, "step": 1898 }, { "epoch": 0.16652667953726147, "grad_norm": 0.08544921875, "learning_rate": 0.002950553385602189, "loss": 1.2873, "step": 1899 }, { "epoch": 0.166614371311636, "grad_norm": 0.07958984375, "learning_rate": 0.002950443999199189, "loss": 1.2687, "step": 1900 }, { "epoch": 0.16670206308601057, "grad_norm": 0.06494140625, "learning_rate": 0.002950334494199054, "loss": 1.307, "step": 1901 }, { "epoch": 0.1667897548603851, "grad_norm": 0.0888671875, "learning_rate": 0.0029502248706117693, "loss": 1.3191, "step": 1902 }, { "epoch": 0.16687744663475965, "grad_norm": 0.0849609375, "learning_rate": 0.002950115128447333, "loss": 1.3759, "step": 1903 }, { "epoch": 0.16696513840913418, "grad_norm": 0.0751953125, "learning_rate": 0.0029500052677157535, "loss": 1.2491, "step": 1904 }, { "epoch": 0.16705283018350875, "grad_norm": 0.06982421875, "learning_rate": 0.0029498952884270493, "loss": 1.2773, "step": 1905 }, { "epoch": 0.1671405219578833, "grad_norm": 0.1337890625, "learning_rate": 0.00294978519059125, "loss": 1.2975, "step": 1906 }, { "epoch": 0.16722821373225782, "grad_norm": 0.1923828125, "learning_rate": 0.0029496749742183976, "loss": 1.3059, "step": 1907 }, { "epoch": 0.16731590550663236, "grad_norm": 0.1884765625, "learning_rate": 0.002949564639318542, "loss": 1.3331, "step": 1908 }, { "epoch": 0.16740359728100693, "grad_norm": 0.0791015625, "learning_rate": 0.0029494541859017465, "loss": 1.2586, "step": 1909 }, { "epoch": 0.16749128905538146, "grad_norm": 0.193359375, "learning_rate": 0.0029493436139780838, "loss": 1.3039, "step": 1910 }, { "epoch": 0.167578980829756, "grad_norm": 0.1044921875, "learning_rate": 0.0029492329235576375, "loss": 1.281, "step": 1911 }, { "epoch": 0.16766667260413057, "grad_norm": 0.0859375, "learning_rate": 0.0029491221146505024, "loss": 1.2381, "step": 1912 }, { "epoch": 0.1677543643785051, "grad_norm": 0.0712890625, "learning_rate": 0.0029490111872667846, "loss": 1.2411, "step": 1913 }, { "epoch": 0.16784205615287964, "grad_norm": 0.1044921875, "learning_rate": 0.0029489001414165997, "loss": 1.2881, "step": 1914 }, { "epoch": 0.16792974792725418, "grad_norm": 0.0830078125, "learning_rate": 0.0029487889771100746, "loss": 1.1976, "step": 1915 }, { "epoch": 0.16801743970162875, "grad_norm": 0.1025390625, "learning_rate": 0.002948677694357348, "loss": 1.2403, "step": 1916 }, { "epoch": 0.16810513147600328, "grad_norm": 0.07861328125, "learning_rate": 0.002948566293168568, "loss": 1.2295, "step": 1917 }, { "epoch": 0.16819282325037782, "grad_norm": 0.1630859375, "learning_rate": 0.0029484547735538946, "loss": 1.3096, "step": 1918 }, { "epoch": 0.1682805150247524, "grad_norm": 0.1337890625, "learning_rate": 0.0029483431355234973, "loss": 1.2894, "step": 1919 }, { "epoch": 0.16836820679912692, "grad_norm": 0.1064453125, "learning_rate": 0.002948231379087558, "loss": 1.3074, "step": 1920 }, { "epoch": 0.16845589857350146, "grad_norm": 0.130859375, "learning_rate": 0.0029481195042562686, "loss": 1.2618, "step": 1921 }, { "epoch": 0.168543590347876, "grad_norm": 0.07666015625, "learning_rate": 0.002948007511039831, "loss": 1.3031, "step": 1922 }, { "epoch": 0.16863128212225056, "grad_norm": 0.1494140625, "learning_rate": 0.002947895399448459, "loss": 1.317, "step": 1923 }, { "epoch": 0.1687189738966251, "grad_norm": 0.125, "learning_rate": 0.0029477831694923774, "loss": 1.2723, "step": 1924 }, { "epoch": 0.16880666567099964, "grad_norm": 0.154296875, "learning_rate": 0.0029476708211818212, "loss": 1.3451, "step": 1925 }, { "epoch": 0.16889435744537418, "grad_norm": 0.146484375, "learning_rate": 0.002947558354527036, "loss": 1.2635, "step": 1926 }, { "epoch": 0.16898204921974874, "grad_norm": 0.1455078125, "learning_rate": 0.002947445769538278, "loss": 1.276, "step": 1927 }, { "epoch": 0.16906974099412328, "grad_norm": 0.1396484375, "learning_rate": 0.0029473330662258155, "loss": 1.3344, "step": 1928 }, { "epoch": 0.16915743276849782, "grad_norm": 0.09130859375, "learning_rate": 0.002947220244599926, "loss": 1.2571, "step": 1929 }, { "epoch": 0.16924512454287238, "grad_norm": 0.06884765625, "learning_rate": 0.0029471073046709, "loss": 1.2501, "step": 1930 }, { "epoch": 0.16933281631724692, "grad_norm": 0.076171875, "learning_rate": 0.0029469942464490353, "loss": 1.2875, "step": 1931 }, { "epoch": 0.16942050809162146, "grad_norm": 0.08837890625, "learning_rate": 0.002946881069944644, "loss": 1.2909, "step": 1932 }, { "epoch": 0.169508199865996, "grad_norm": 0.072265625, "learning_rate": 0.002946767775168047, "loss": 1.3028, "step": 1933 }, { "epoch": 0.16959589164037056, "grad_norm": 0.1044921875, "learning_rate": 0.002946654362129576, "loss": 1.2602, "step": 1934 }, { "epoch": 0.1696835834147451, "grad_norm": 0.1142578125, "learning_rate": 0.0029465408308395748, "loss": 1.319, "step": 1935 }, { "epoch": 0.16977127518911964, "grad_norm": 0.078125, "learning_rate": 0.002946427181308397, "loss": 1.2907, "step": 1936 }, { "epoch": 0.1698589669634942, "grad_norm": 0.11474609375, "learning_rate": 0.0029463134135464066, "loss": 1.2207, "step": 1937 }, { "epoch": 0.16994665873786874, "grad_norm": 0.10546875, "learning_rate": 0.0029461995275639795, "loss": 1.2865, "step": 1938 }, { "epoch": 0.17003435051224328, "grad_norm": 0.1328125, "learning_rate": 0.0029460855233715013, "loss": 1.2687, "step": 1939 }, { "epoch": 0.17012204228661781, "grad_norm": 0.0771484375, "learning_rate": 0.0029459714009793694, "loss": 1.2635, "step": 1940 }, { "epoch": 0.17020973406099238, "grad_norm": 0.1083984375, "learning_rate": 0.0029458571603979914, "loss": 1.3113, "step": 1941 }, { "epoch": 0.17029742583536692, "grad_norm": 0.09130859375, "learning_rate": 0.0029457428016377856, "loss": 1.2634, "step": 1942 }, { "epoch": 0.17038511760974145, "grad_norm": 0.07275390625, "learning_rate": 0.002945628324709181, "loss": 1.3609, "step": 1943 }, { "epoch": 0.170472809384116, "grad_norm": 0.1787109375, "learning_rate": 0.0029455137296226183, "loss": 1.2946, "step": 1944 }, { "epoch": 0.17056050115849056, "grad_norm": 0.111328125, "learning_rate": 0.0029453990163885472, "loss": 1.3569, "step": 1945 }, { "epoch": 0.1706481929328651, "grad_norm": 0.1298828125, "learning_rate": 0.00294528418501743, "loss": 1.2823, "step": 1946 }, { "epoch": 0.17073588470723963, "grad_norm": 0.09716796875, "learning_rate": 0.0029451692355197396, "loss": 1.2662, "step": 1947 }, { "epoch": 0.1708235764816142, "grad_norm": 0.1025390625, "learning_rate": 0.0029450541679059573, "loss": 1.2931, "step": 1948 }, { "epoch": 0.17091126825598874, "grad_norm": 0.134765625, "learning_rate": 0.0029449389821865793, "loss": 1.1969, "step": 1949 }, { "epoch": 0.17099896003036327, "grad_norm": 0.12158203125, "learning_rate": 0.0029448236783721077, "loss": 1.2791, "step": 1950 }, { "epoch": 0.1710866518047378, "grad_norm": 0.07666015625, "learning_rate": 0.00294470825647306, "loss": 1.3965, "step": 1951 }, { "epoch": 0.17117434357911238, "grad_norm": 0.072265625, "learning_rate": 0.0029445927164999616, "loss": 1.3565, "step": 1952 }, { "epoch": 0.1712620353534869, "grad_norm": 0.08154296875, "learning_rate": 0.0029444770584633488, "loss": 1.2791, "step": 1953 }, { "epoch": 0.17134972712786145, "grad_norm": 0.119140625, "learning_rate": 0.0029443612823737706, "loss": 1.2292, "step": 1954 }, { "epoch": 0.17143741890223602, "grad_norm": 0.0732421875, "learning_rate": 0.0029442453882417845, "loss": 1.2841, "step": 1955 }, { "epoch": 0.17152511067661055, "grad_norm": 0.1005859375, "learning_rate": 0.00294412937607796, "loss": 1.2832, "step": 1956 }, { "epoch": 0.1716128024509851, "grad_norm": 0.0712890625, "learning_rate": 0.002944013245892877, "loss": 1.3156, "step": 1957 }, { "epoch": 0.17170049422535963, "grad_norm": 0.21484375, "learning_rate": 0.0029438969976971273, "loss": 1.291, "step": 1958 }, { "epoch": 0.1717881859997342, "grad_norm": 0.11328125, "learning_rate": 0.002943780631501311, "loss": 1.2569, "step": 1959 }, { "epoch": 0.17187587777410873, "grad_norm": 0.177734375, "learning_rate": 0.002943664147316041, "loss": 1.3101, "step": 1960 }, { "epoch": 0.17196356954848327, "grad_norm": 0.1630859375, "learning_rate": 0.0029435475451519404, "loss": 1.2592, "step": 1961 }, { "epoch": 0.1720512613228578, "grad_norm": 0.08642578125, "learning_rate": 0.002943430825019643, "loss": 1.3996, "step": 1962 }, { "epoch": 0.17213895309723237, "grad_norm": 0.13671875, "learning_rate": 0.002943313986929793, "loss": 1.2734, "step": 1963 }, { "epoch": 0.1722266448716069, "grad_norm": 0.07861328125, "learning_rate": 0.0029431970308930466, "loss": 1.3204, "step": 1964 }, { "epoch": 0.17231433664598145, "grad_norm": 0.083984375, "learning_rate": 0.0029430799569200687, "loss": 1.27, "step": 1965 }, { "epoch": 0.172402028420356, "grad_norm": 0.07177734375, "learning_rate": 0.0029429627650215374, "loss": 1.2319, "step": 1966 }, { "epoch": 0.17248972019473055, "grad_norm": 0.076171875, "learning_rate": 0.0029428454552081393, "loss": 1.3078, "step": 1967 }, { "epoch": 0.1725774119691051, "grad_norm": 0.10595703125, "learning_rate": 0.002942728027490574, "loss": 1.2591, "step": 1968 }, { "epoch": 0.17266510374347963, "grad_norm": 0.08642578125, "learning_rate": 0.002942610481879549, "loss": 1.3353, "step": 1969 }, { "epoch": 0.1727527955178542, "grad_norm": 0.10302734375, "learning_rate": 0.0029424928183857846, "loss": 1.3396, "step": 1970 }, { "epoch": 0.17284048729222873, "grad_norm": 0.080078125, "learning_rate": 0.002942375037020012, "loss": 1.3243, "step": 1971 }, { "epoch": 0.17292817906660327, "grad_norm": 0.06884765625, "learning_rate": 0.0029422571377929726, "loss": 1.3106, "step": 1972 }, { "epoch": 0.17301587084097783, "grad_norm": 0.083984375, "learning_rate": 0.0029421391207154184, "loss": 1.2602, "step": 1973 }, { "epoch": 0.17310356261535237, "grad_norm": 0.064453125, "learning_rate": 0.0029420209857981116, "loss": 1.2806, "step": 1974 }, { "epoch": 0.1731912543897269, "grad_norm": 0.08935546875, "learning_rate": 0.0029419027330518255, "loss": 1.2117, "step": 1975 }, { "epoch": 0.17327894616410144, "grad_norm": 0.0751953125, "learning_rate": 0.002941784362487346, "loss": 1.3052, "step": 1976 }, { "epoch": 0.173366637938476, "grad_norm": 0.0771484375, "learning_rate": 0.0029416658741154675, "loss": 1.2397, "step": 1977 }, { "epoch": 0.17345432971285055, "grad_norm": 0.11083984375, "learning_rate": 0.0029415472679469954, "loss": 1.2818, "step": 1978 }, { "epoch": 0.17354202148722508, "grad_norm": 0.11083984375, "learning_rate": 0.0029414285439927465, "loss": 1.2803, "step": 1979 }, { "epoch": 0.17362971326159962, "grad_norm": 0.0654296875, "learning_rate": 0.0029413097022635486, "loss": 1.2976, "step": 1980 }, { "epoch": 0.1737174050359742, "grad_norm": 0.07080078125, "learning_rate": 0.002941190742770239, "loss": 1.3047, "step": 1981 }, { "epoch": 0.17380509681034872, "grad_norm": 0.07421875, "learning_rate": 0.002941071665523667, "loss": 1.2546, "step": 1982 }, { "epoch": 0.17389278858472326, "grad_norm": 0.091796875, "learning_rate": 0.0029409524705346917, "loss": 1.2918, "step": 1983 }, { "epoch": 0.17398048035909783, "grad_norm": 0.16015625, "learning_rate": 0.002940833157814184, "loss": 1.3862, "step": 1984 }, { "epoch": 0.17406817213347237, "grad_norm": 0.09716796875, "learning_rate": 0.0029407137273730244, "loss": 1.3218, "step": 1985 }, { "epoch": 0.1741558639078469, "grad_norm": 0.1884765625, "learning_rate": 0.002940594179222105, "loss": 1.3668, "step": 1986 }, { "epoch": 0.17424355568222144, "grad_norm": 0.23046875, "learning_rate": 0.002940474513372328, "loss": 1.2302, "step": 1987 }, { "epoch": 0.174331247456596, "grad_norm": 0.07421875, "learning_rate": 0.0029403547298346064, "loss": 1.3277, "step": 1988 }, { "epoch": 0.17441893923097054, "grad_norm": 0.2578125, "learning_rate": 0.0029402348286198653, "loss": 1.2634, "step": 1989 }, { "epoch": 0.17450663100534508, "grad_norm": 0.10498046875, "learning_rate": 0.002940114809739038, "loss": 1.2542, "step": 1990 }, { "epoch": 0.17459432277971965, "grad_norm": 0.2099609375, "learning_rate": 0.0029399946732030706, "loss": 1.3228, "step": 1991 }, { "epoch": 0.17468201455409418, "grad_norm": 0.1513671875, "learning_rate": 0.0029398744190229187, "loss": 1.2353, "step": 1992 }, { "epoch": 0.17476970632846872, "grad_norm": 0.09033203125, "learning_rate": 0.00293975404720955, "loss": 1.3104, "step": 1993 }, { "epoch": 0.17485739810284326, "grad_norm": 0.07861328125, "learning_rate": 0.0029396335577739413, "loss": 1.2444, "step": 1994 }, { "epoch": 0.17494508987721782, "grad_norm": 0.095703125, "learning_rate": 0.0029395129507270817, "loss": 1.3507, "step": 1995 }, { "epoch": 0.17503278165159236, "grad_norm": 0.177734375, "learning_rate": 0.00293939222607997, "loss": 1.3499, "step": 1996 }, { "epoch": 0.1751204734259669, "grad_norm": 0.140625, "learning_rate": 0.002939271383843615, "loss": 1.2426, "step": 1997 }, { "epoch": 0.17520816520034144, "grad_norm": 0.130859375, "learning_rate": 0.002939150424029039, "loss": 1.2648, "step": 1998 }, { "epoch": 0.175295856974716, "grad_norm": 0.20703125, "learning_rate": 0.002939029346647272, "loss": 1.3051, "step": 1999 }, { "epoch": 0.17538354874909054, "grad_norm": 0.07568359375, "learning_rate": 0.0029389081517093556, "loss": 1.3173, "step": 2000 }, { "epoch": 0.17538354874909054, "eval_loss": 1.2995423078536987, "eval_runtime": 429.2876, "eval_samples_per_second": 33.653, "eval_steps_per_second": 8.414, "step": 2000 }, { "epoch": 0.17547124052346508, "grad_norm": 0.1845703125, "learning_rate": 0.002938786839226344, "loss": 1.2485, "step": 2001 }, { "epoch": 0.17555893229783964, "grad_norm": 0.06884765625, "learning_rate": 0.002938665409209299, "loss": 1.3304, "step": 2002 }, { "epoch": 0.17564662407221418, "grad_norm": 0.22265625, "learning_rate": 0.0029385438616692954, "loss": 1.3498, "step": 2003 }, { "epoch": 0.17573431584658872, "grad_norm": 0.1376953125, "learning_rate": 0.002938422196617418, "loss": 1.3492, "step": 2004 }, { "epoch": 0.17582200762096326, "grad_norm": 0.1611328125, "learning_rate": 0.0029383004140647625, "loss": 1.3309, "step": 2005 }, { "epoch": 0.17590969939533782, "grad_norm": 0.1259765625, "learning_rate": 0.002938178514022435, "loss": 1.3032, "step": 2006 }, { "epoch": 0.17599739116971236, "grad_norm": 0.14453125, "learning_rate": 0.002938056496501552, "loss": 1.2889, "step": 2007 }, { "epoch": 0.1760850829440869, "grad_norm": 0.1484375, "learning_rate": 0.002937934361513242, "loss": 1.3635, "step": 2008 }, { "epoch": 0.17617277471846146, "grad_norm": 0.103515625, "learning_rate": 0.002937812109068643, "loss": 1.3009, "step": 2009 }, { "epoch": 0.176260466492836, "grad_norm": 0.14453125, "learning_rate": 0.002937689739178904, "loss": 1.3083, "step": 2010 }, { "epoch": 0.17634815826721054, "grad_norm": 0.08984375, "learning_rate": 0.002937567251855185, "loss": 1.336, "step": 2011 }, { "epoch": 0.17643585004158507, "grad_norm": 0.08251953125, "learning_rate": 0.0029374446471086566, "loss": 1.2509, "step": 2012 }, { "epoch": 0.17652354181595964, "grad_norm": 0.08349609375, "learning_rate": 0.0029373219249504996, "loss": 1.2925, "step": 2013 }, { "epoch": 0.17661123359033418, "grad_norm": 0.07958984375, "learning_rate": 0.002937199085391907, "loss": 1.3003, "step": 2014 }, { "epoch": 0.17669892536470871, "grad_norm": 0.1728515625, "learning_rate": 0.00293707612844408, "loss": 1.2699, "step": 2015 }, { "epoch": 0.17678661713908325, "grad_norm": 0.07763671875, "learning_rate": 0.0029369530541182324, "loss": 1.2268, "step": 2016 }, { "epoch": 0.17687430891345782, "grad_norm": 0.11669921875, "learning_rate": 0.0029368298624255895, "loss": 1.3013, "step": 2017 }, { "epoch": 0.17696200068783235, "grad_norm": 0.07373046875, "learning_rate": 0.002936706553377384, "loss": 1.2499, "step": 2018 }, { "epoch": 0.1770496924622069, "grad_norm": 0.0859375, "learning_rate": 0.002936583126984863, "loss": 1.2849, "step": 2019 }, { "epoch": 0.17713738423658146, "grad_norm": 0.1083984375, "learning_rate": 0.002936459583259282, "loss": 1.3292, "step": 2020 }, { "epoch": 0.177225076010956, "grad_norm": 0.07080078125, "learning_rate": 0.002936335922211908, "loss": 1.2678, "step": 2021 }, { "epoch": 0.17731276778533053, "grad_norm": 0.06884765625, "learning_rate": 0.0029362121438540187, "loss": 1.2824, "step": 2022 }, { "epoch": 0.17740045955970507, "grad_norm": 0.08056640625, "learning_rate": 0.002936088248196902, "loss": 1.3131, "step": 2023 }, { "epoch": 0.17748815133407964, "grad_norm": 0.0966796875, "learning_rate": 0.002935964235251857, "loss": 1.329, "step": 2024 }, { "epoch": 0.17757584310845417, "grad_norm": 0.06298828125, "learning_rate": 0.002935840105030194, "loss": 1.2658, "step": 2025 }, { "epoch": 0.1776635348828287, "grad_norm": 0.107421875, "learning_rate": 0.0029357158575432324, "loss": 1.3242, "step": 2026 }, { "epoch": 0.17775122665720328, "grad_norm": 0.09326171875, "learning_rate": 0.0029355914928023037, "loss": 1.3222, "step": 2027 }, { "epoch": 0.1778389184315778, "grad_norm": 0.1015625, "learning_rate": 0.0029354670108187497, "loss": 1.2682, "step": 2028 }, { "epoch": 0.17792661020595235, "grad_norm": 0.06494140625, "learning_rate": 0.0029353424116039225, "loss": 1.2478, "step": 2029 }, { "epoch": 0.1780143019803269, "grad_norm": 0.095703125, "learning_rate": 0.0029352176951691856, "loss": 1.3001, "step": 2030 }, { "epoch": 0.17810199375470145, "grad_norm": 0.11767578125, "learning_rate": 0.0029350928615259125, "loss": 1.2986, "step": 2031 }, { "epoch": 0.178189685529076, "grad_norm": 0.07080078125, "learning_rate": 0.002934967910685488, "loss": 1.3619, "step": 2032 }, { "epoch": 0.17827737730345053, "grad_norm": 0.0654296875, "learning_rate": 0.002934842842659307, "loss": 1.2932, "step": 2033 }, { "epoch": 0.17836506907782507, "grad_norm": 0.0693359375, "learning_rate": 0.002934717657458776, "loss": 1.2643, "step": 2034 }, { "epoch": 0.17845276085219963, "grad_norm": 0.06494140625, "learning_rate": 0.0029345923550953106, "loss": 1.2544, "step": 2035 }, { "epoch": 0.17854045262657417, "grad_norm": 0.0732421875, "learning_rate": 0.0029344669355803386, "loss": 1.3077, "step": 2036 }, { "epoch": 0.1786281444009487, "grad_norm": 0.06494140625, "learning_rate": 0.0029343413989252986, "loss": 1.183, "step": 2037 }, { "epoch": 0.17871583617532327, "grad_norm": 0.059326171875, "learning_rate": 0.0029342157451416376, "loss": 1.3681, "step": 2038 }, { "epoch": 0.1788035279496978, "grad_norm": 0.0712890625, "learning_rate": 0.002934089974240817, "loss": 1.2765, "step": 2039 }, { "epoch": 0.17889121972407235, "grad_norm": 0.07373046875, "learning_rate": 0.0029339640862343047, "loss": 1.2838, "step": 2040 }, { "epoch": 0.17897891149844689, "grad_norm": 0.1064453125, "learning_rate": 0.0029338380811335823, "loss": 1.2489, "step": 2041 }, { "epoch": 0.17906660327282145, "grad_norm": 0.06103515625, "learning_rate": 0.0029337119589501414, "loss": 1.3543, "step": 2042 }, { "epoch": 0.179154295047196, "grad_norm": 0.06982421875, "learning_rate": 0.0029335857196954836, "loss": 1.3905, "step": 2043 }, { "epoch": 0.17924198682157053, "grad_norm": 0.07421875, "learning_rate": 0.002933459363381122, "loss": 1.327, "step": 2044 }, { "epoch": 0.1793296785959451, "grad_norm": 0.0673828125, "learning_rate": 0.0029333328900185795, "loss": 1.2801, "step": 2045 }, { "epoch": 0.17941737037031963, "grad_norm": 0.12890625, "learning_rate": 0.00293320629961939, "loss": 1.3188, "step": 2046 }, { "epoch": 0.17950506214469417, "grad_norm": 0.0810546875, "learning_rate": 0.002933079592195099, "loss": 1.2847, "step": 2047 }, { "epoch": 0.1795927539190687, "grad_norm": 0.10693359375, "learning_rate": 0.0029329527677572614, "loss": 1.2744, "step": 2048 }, { "epoch": 0.17968044569344327, "grad_norm": 0.06201171875, "learning_rate": 0.0029328258263174433, "loss": 1.3479, "step": 2049 }, { "epoch": 0.1797681374678178, "grad_norm": 0.06298828125, "learning_rate": 0.002932698767887221, "loss": 1.4069, "step": 2050 }, { "epoch": 0.17985582924219234, "grad_norm": 0.07177734375, "learning_rate": 0.0029325715924781825, "loss": 1.2809, "step": 2051 }, { "epoch": 0.17994352101656688, "grad_norm": 0.068359375, "learning_rate": 0.002932444300101926, "loss": 1.2276, "step": 2052 }, { "epoch": 0.18003121279094145, "grad_norm": 0.083984375, "learning_rate": 0.0029323168907700596, "loss": 1.3572, "step": 2053 }, { "epoch": 0.18011890456531598, "grad_norm": 0.0771484375, "learning_rate": 0.0029321893644942035, "loss": 1.3139, "step": 2054 }, { "epoch": 0.18020659633969052, "grad_norm": 0.07421875, "learning_rate": 0.0029320617212859872, "loss": 1.254, "step": 2055 }, { "epoch": 0.1802942881140651, "grad_norm": 0.10107421875, "learning_rate": 0.002931933961157051, "loss": 1.2231, "step": 2056 }, { "epoch": 0.18038197988843963, "grad_norm": 0.107421875, "learning_rate": 0.002931806084119048, "loss": 1.261, "step": 2057 }, { "epoch": 0.18046967166281416, "grad_norm": 0.1298828125, "learning_rate": 0.0029316780901836375, "loss": 1.239, "step": 2058 }, { "epoch": 0.1805573634371887, "grad_norm": 0.10205078125, "learning_rate": 0.002931549979362495, "loss": 1.3004, "step": 2059 }, { "epoch": 0.18064505521156327, "grad_norm": 0.068359375, "learning_rate": 0.002931421751667302, "loss": 1.2773, "step": 2060 }, { "epoch": 0.1807327469859378, "grad_norm": 0.0830078125, "learning_rate": 0.002931293407109754, "loss": 1.3422, "step": 2061 }, { "epoch": 0.18082043876031234, "grad_norm": 0.1474609375, "learning_rate": 0.0029311649457015544, "loss": 1.365, "step": 2062 }, { "epoch": 0.1809081305346869, "grad_norm": 0.1591796875, "learning_rate": 0.0029310363674544195, "loss": 1.2521, "step": 2063 }, { "epoch": 0.18099582230906144, "grad_norm": 0.09375, "learning_rate": 0.0029309076723800743, "loss": 1.259, "step": 2064 }, { "epoch": 0.18108351408343598, "grad_norm": 0.154296875, "learning_rate": 0.0029307788604902565, "loss": 1.2831, "step": 2065 }, { "epoch": 0.18117120585781052, "grad_norm": 0.078125, "learning_rate": 0.002930649931796712, "loss": 1.3899, "step": 2066 }, { "epoch": 0.18125889763218508, "grad_norm": 0.16015625, "learning_rate": 0.0029305208863112005, "loss": 1.2355, "step": 2067 }, { "epoch": 0.18134658940655962, "grad_norm": 0.1337890625, "learning_rate": 0.00293039172404549, "loss": 1.2356, "step": 2068 }, { "epoch": 0.18143428118093416, "grad_norm": 0.08984375, "learning_rate": 0.0029302624450113592, "loss": 1.287, "step": 2069 }, { "epoch": 0.1815219729553087, "grad_norm": 0.0810546875, "learning_rate": 0.002930133049220598, "loss": 1.2087, "step": 2070 }, { "epoch": 0.18160966472968326, "grad_norm": 0.10009765625, "learning_rate": 0.002930003536685008, "loss": 1.2763, "step": 2071 }, { "epoch": 0.1816973565040578, "grad_norm": 0.08984375, "learning_rate": 0.0029298739074164, "loss": 1.2378, "step": 2072 }, { "epoch": 0.18178504827843234, "grad_norm": 0.091796875, "learning_rate": 0.002929744161426595, "loss": 1.2721, "step": 2073 }, { "epoch": 0.1818727400528069, "grad_norm": 0.10546875, "learning_rate": 0.002929614298727426, "loss": 1.276, "step": 2074 }, { "epoch": 0.18196043182718144, "grad_norm": 0.08984375, "learning_rate": 0.0029294843193307368, "loss": 1.2939, "step": 2075 }, { "epoch": 0.18204812360155598, "grad_norm": 0.1328125, "learning_rate": 0.0029293542232483806, "loss": 1.3273, "step": 2076 }, { "epoch": 0.18213581537593052, "grad_norm": 0.1396484375, "learning_rate": 0.0029292240104922215, "loss": 1.2891, "step": 2077 }, { "epoch": 0.18222350715030508, "grad_norm": 0.12158203125, "learning_rate": 0.0029290936810741353, "loss": 1.3401, "step": 2078 }, { "epoch": 0.18231119892467962, "grad_norm": 0.263671875, "learning_rate": 0.0029289632350060074, "loss": 1.2913, "step": 2079 }, { "epoch": 0.18239889069905416, "grad_norm": 0.12060546875, "learning_rate": 0.002928832672299734, "loss": 1.2751, "step": 2080 }, { "epoch": 0.1824865824734287, "grad_norm": 0.12451171875, "learning_rate": 0.002928701992967222, "loss": 1.3103, "step": 2081 }, { "epoch": 0.18257427424780326, "grad_norm": 0.09423828125, "learning_rate": 0.0029285711970203888, "loss": 1.2821, "step": 2082 }, { "epoch": 0.1826619660221778, "grad_norm": 0.1025390625, "learning_rate": 0.0029284402844711637, "loss": 1.2696, "step": 2083 }, { "epoch": 0.18274965779655233, "grad_norm": 0.119140625, "learning_rate": 0.0029283092553314847, "loss": 1.3003, "step": 2084 }, { "epoch": 0.1828373495709269, "grad_norm": 0.078125, "learning_rate": 0.0029281781096133015, "loss": 1.2424, "step": 2085 }, { "epoch": 0.18292504134530144, "grad_norm": 0.1083984375, "learning_rate": 0.0029280468473285746, "loss": 1.3091, "step": 2086 }, { "epoch": 0.18301273311967597, "grad_norm": 0.08154296875, "learning_rate": 0.002927915468489274, "loss": 1.2801, "step": 2087 }, { "epoch": 0.1831004248940505, "grad_norm": 0.1083984375, "learning_rate": 0.002927783973107382, "loss": 1.3374, "step": 2088 }, { "epoch": 0.18318811666842508, "grad_norm": 0.06982421875, "learning_rate": 0.0029276523611948906, "loss": 1.2714, "step": 2089 }, { "epoch": 0.18327580844279961, "grad_norm": 0.061767578125, "learning_rate": 0.0029275206327638017, "loss": 1.3353, "step": 2090 }, { "epoch": 0.18336350021717415, "grad_norm": 0.06787109375, "learning_rate": 0.0029273887878261285, "loss": 1.2432, "step": 2091 }, { "epoch": 0.18345119199154872, "grad_norm": 0.07421875, "learning_rate": 0.002927256826393896, "loss": 1.2248, "step": 2092 }, { "epoch": 0.18353888376592326, "grad_norm": 0.10302734375, "learning_rate": 0.0029271247484791377, "loss": 1.2952, "step": 2093 }, { "epoch": 0.1836265755402978, "grad_norm": 0.0556640625, "learning_rate": 0.0029269925540939, "loss": 1.2897, "step": 2094 }, { "epoch": 0.18371426731467233, "grad_norm": 0.06005859375, "learning_rate": 0.002926860243250237, "loss": 1.2682, "step": 2095 }, { "epoch": 0.1838019590890469, "grad_norm": 0.057373046875, "learning_rate": 0.002926727815960216, "loss": 1.3283, "step": 2096 }, { "epoch": 0.18388965086342143, "grad_norm": 0.08984375, "learning_rate": 0.0029265952722359154, "loss": 1.2866, "step": 2097 }, { "epoch": 0.18397734263779597, "grad_norm": 0.06298828125, "learning_rate": 0.0029264626120894203, "loss": 1.2639, "step": 2098 }, { "epoch": 0.1840650344121705, "grad_norm": 0.09716796875, "learning_rate": 0.0029263298355328305, "loss": 1.242, "step": 2099 }, { "epoch": 0.18415272618654507, "grad_norm": 0.0966796875, "learning_rate": 0.0029261969425782544, "loss": 1.273, "step": 2100 }, { "epoch": 0.1842404179609196, "grad_norm": 0.0712890625, "learning_rate": 0.0029260639332378117, "loss": 1.2406, "step": 2101 }, { "epoch": 0.18432810973529415, "grad_norm": 0.08740234375, "learning_rate": 0.002925930807523632, "loss": 1.3257, "step": 2102 }, { "epoch": 0.18441580150966871, "grad_norm": 0.09326171875, "learning_rate": 0.002925797565447857, "loss": 1.2914, "step": 2103 }, { "epoch": 0.18450349328404325, "grad_norm": 0.16796875, "learning_rate": 0.0029256642070226374, "loss": 1.3653, "step": 2104 }, { "epoch": 0.1845911850584178, "grad_norm": 0.1162109375, "learning_rate": 0.002925530732260135, "loss": 1.2269, "step": 2105 }, { "epoch": 0.18467887683279233, "grad_norm": 0.09033203125, "learning_rate": 0.002925397141172523, "loss": 1.2591, "step": 2106 }, { "epoch": 0.1847665686071669, "grad_norm": 0.0927734375, "learning_rate": 0.0029252634337719836, "loss": 1.3032, "step": 2107 }, { "epoch": 0.18485426038154143, "grad_norm": 0.107421875, "learning_rate": 0.0029251296100707117, "loss": 1.3121, "step": 2108 }, { "epoch": 0.18494195215591597, "grad_norm": 0.0732421875, "learning_rate": 0.0029249956700809106, "loss": 1.2635, "step": 2109 }, { "epoch": 0.18502964393029053, "grad_norm": 0.0908203125, "learning_rate": 0.002924861613814796, "loss": 1.2585, "step": 2110 }, { "epoch": 0.18511733570466507, "grad_norm": 0.076171875, "learning_rate": 0.0029247274412845933, "loss": 1.2616, "step": 2111 }, { "epoch": 0.1852050274790396, "grad_norm": 0.10205078125, "learning_rate": 0.0029245931525025384, "loss": 1.3721, "step": 2112 }, { "epoch": 0.18529271925341415, "grad_norm": 0.2138671875, "learning_rate": 0.002924458747480879, "loss": 1.3301, "step": 2113 }, { "epoch": 0.1853804110277887, "grad_norm": 0.240234375, "learning_rate": 0.0029243242262318708, "loss": 1.2551, "step": 2114 }, { "epoch": 0.18546810280216325, "grad_norm": 0.0947265625, "learning_rate": 0.0029241895887677834, "loss": 1.3276, "step": 2115 }, { "epoch": 0.18555579457653779, "grad_norm": 0.1982421875, "learning_rate": 0.002924054835100895, "loss": 1.2977, "step": 2116 }, { "epoch": 0.18564348635091232, "grad_norm": 0.11279296875, "learning_rate": 0.0029239199652434937, "loss": 1.2529, "step": 2117 }, { "epoch": 0.1857311781252869, "grad_norm": 0.0986328125, "learning_rate": 0.0029237849792078815, "loss": 1.2947, "step": 2118 }, { "epoch": 0.18581886989966143, "grad_norm": 0.12255859375, "learning_rate": 0.0029236498770063664, "loss": 1.3339, "step": 2119 }, { "epoch": 0.18590656167403596, "grad_norm": 0.07666015625, "learning_rate": 0.002923514658651271, "loss": 1.3381, "step": 2120 }, { "epoch": 0.18599425344841053, "grad_norm": 0.13671875, "learning_rate": 0.0029233793241549262, "loss": 1.2915, "step": 2121 }, { "epoch": 0.18608194522278507, "grad_norm": 0.0673828125, "learning_rate": 0.0029232438735296742, "loss": 1.2917, "step": 2122 }, { "epoch": 0.1861696369971596, "grad_norm": 0.1318359375, "learning_rate": 0.002923108306787868, "loss": 1.2928, "step": 2123 }, { "epoch": 0.18625732877153414, "grad_norm": 0.07080078125, "learning_rate": 0.00292297262394187, "loss": 1.2238, "step": 2124 }, { "epoch": 0.1863450205459087, "grad_norm": 0.11962890625, "learning_rate": 0.002922836825004056, "loss": 1.2365, "step": 2125 }, { "epoch": 0.18643271232028324, "grad_norm": 0.07861328125, "learning_rate": 0.002922700909986808, "loss": 1.3205, "step": 2126 }, { "epoch": 0.18652040409465778, "grad_norm": 0.181640625, "learning_rate": 0.0029225648789025236, "loss": 1.3367, "step": 2127 }, { "epoch": 0.18660809586903235, "grad_norm": 0.171875, "learning_rate": 0.0029224287317636067, "loss": 1.2601, "step": 2128 }, { "epoch": 0.18669578764340689, "grad_norm": 0.16015625, "learning_rate": 0.002922292468582475, "loss": 1.3007, "step": 2129 }, { "epoch": 0.18678347941778142, "grad_norm": 0.1884765625, "learning_rate": 0.0029221560893715535, "loss": 1.2981, "step": 2130 }, { "epoch": 0.18687117119215596, "grad_norm": 0.150390625, "learning_rate": 0.002922019594143281, "loss": 1.3115, "step": 2131 }, { "epoch": 0.18695886296653053, "grad_norm": 0.2578125, "learning_rate": 0.0029218829829101055, "loss": 1.2917, "step": 2132 }, { "epoch": 0.18704655474090506, "grad_norm": 0.0791015625, "learning_rate": 0.002921746255684485, "loss": 1.2705, "step": 2133 }, { "epoch": 0.1871342465152796, "grad_norm": 0.298828125, "learning_rate": 0.0029216094124788895, "loss": 1.2559, "step": 2134 }, { "epoch": 0.18722193828965414, "grad_norm": 0.1396484375, "learning_rate": 0.0029214724533057983, "loss": 1.2599, "step": 2135 }, { "epoch": 0.1873096300640287, "grad_norm": 0.1396484375, "learning_rate": 0.002921335378177701, "loss": 1.2606, "step": 2136 }, { "epoch": 0.18739732183840324, "grad_norm": 0.1123046875, "learning_rate": 0.002921198187107099, "loss": 1.2416, "step": 2137 }, { "epoch": 0.18748501361277778, "grad_norm": 0.11083984375, "learning_rate": 0.0029210608801065047, "loss": 1.3157, "step": 2138 }, { "epoch": 0.18757270538715234, "grad_norm": 0.130859375, "learning_rate": 0.002920923457188439, "loss": 1.273, "step": 2139 }, { "epoch": 0.18766039716152688, "grad_norm": 0.1298828125, "learning_rate": 0.002920785918365435, "loss": 1.3201, "step": 2140 }, { "epoch": 0.18774808893590142, "grad_norm": 0.15234375, "learning_rate": 0.002920648263650036, "loss": 1.2455, "step": 2141 }, { "epoch": 0.18783578071027596, "grad_norm": 0.205078125, "learning_rate": 0.0029205104930547948, "loss": 1.3268, "step": 2142 }, { "epoch": 0.18792347248465052, "grad_norm": 0.2578125, "learning_rate": 0.002920372606592277, "loss": 1.3074, "step": 2143 }, { "epoch": 0.18801116425902506, "grad_norm": 0.1748046875, "learning_rate": 0.002920234604275057, "loss": 1.2683, "step": 2144 }, { "epoch": 0.1880988560333996, "grad_norm": 0.2001953125, "learning_rate": 0.00292009648611572, "loss": 1.3148, "step": 2145 }, { "epoch": 0.18818654780777416, "grad_norm": 0.2216796875, "learning_rate": 0.0029199582521268618, "loss": 1.288, "step": 2146 }, { "epoch": 0.1882742395821487, "grad_norm": 0.2275390625, "learning_rate": 0.00291981990232109, "loss": 1.2693, "step": 2147 }, { "epoch": 0.18836193135652324, "grad_norm": 0.1943359375, "learning_rate": 0.002919681436711021, "loss": 1.3157, "step": 2148 }, { "epoch": 0.18844962313089778, "grad_norm": 0.24609375, "learning_rate": 0.0029195428553092824, "loss": 1.2774, "step": 2149 }, { "epoch": 0.18853731490527234, "grad_norm": 0.1396484375, "learning_rate": 0.002919404158128513, "loss": 1.2633, "step": 2150 }, { "epoch": 0.18862500667964688, "grad_norm": 0.1591796875, "learning_rate": 0.0029192653451813604, "loss": 1.2643, "step": 2151 }, { "epoch": 0.18871269845402142, "grad_norm": 0.185546875, "learning_rate": 0.002919126416480486, "loss": 1.2647, "step": 2152 }, { "epoch": 0.18880039022839595, "grad_norm": 0.1376953125, "learning_rate": 0.002918987372038558, "loss": 1.3584, "step": 2153 }, { "epoch": 0.18888808200277052, "grad_norm": 0.1337890625, "learning_rate": 0.002918848211868258, "loss": 1.2712, "step": 2154 }, { "epoch": 0.18897577377714506, "grad_norm": 0.0849609375, "learning_rate": 0.0029187089359822764, "loss": 1.3379, "step": 2155 }, { "epoch": 0.1890634655515196, "grad_norm": 0.087890625, "learning_rate": 0.002918569544393315, "loss": 1.2874, "step": 2156 }, { "epoch": 0.18915115732589416, "grad_norm": 0.06884765625, "learning_rate": 0.002918430037114086, "loss": 1.3174, "step": 2157 }, { "epoch": 0.1892388491002687, "grad_norm": 0.09521484375, "learning_rate": 0.002918290414157312, "loss": 1.2853, "step": 2158 }, { "epoch": 0.18932654087464323, "grad_norm": 0.07763671875, "learning_rate": 0.0029181506755357263, "loss": 1.288, "step": 2159 }, { "epoch": 0.18941423264901777, "grad_norm": 0.09326171875, "learning_rate": 0.0029180108212620726, "loss": 1.2608, "step": 2160 }, { "epoch": 0.18950192442339234, "grad_norm": 0.08203125, "learning_rate": 0.0029178708513491053, "loss": 1.2865, "step": 2161 }, { "epoch": 0.18958961619776687, "grad_norm": 0.10546875, "learning_rate": 0.0029177307658095897, "loss": 1.3253, "step": 2162 }, { "epoch": 0.1896773079721414, "grad_norm": 0.0859375, "learning_rate": 0.0029175905646563007, "loss": 1.2937, "step": 2163 }, { "epoch": 0.18976499974651598, "grad_norm": 0.0908203125, "learning_rate": 0.0029174502479020247, "loss": 1.2722, "step": 2164 }, { "epoch": 0.18985269152089052, "grad_norm": 0.0634765625, "learning_rate": 0.002917309815559558, "loss": 1.3557, "step": 2165 }, { "epoch": 0.18994038329526505, "grad_norm": 0.07958984375, "learning_rate": 0.0029171692676417072, "loss": 1.265, "step": 2166 }, { "epoch": 0.1900280750696396, "grad_norm": 0.059814453125, "learning_rate": 0.0029170286041612908, "loss": 1.255, "step": 2167 }, { "epoch": 0.19011576684401416, "grad_norm": 0.06884765625, "learning_rate": 0.002916887825131136, "loss": 1.3117, "step": 2168 }, { "epoch": 0.1902034586183887, "grad_norm": 0.10302734375, "learning_rate": 0.0029167469305640824, "loss": 1.3273, "step": 2169 }, { "epoch": 0.19029115039276323, "grad_norm": 0.10400390625, "learning_rate": 0.0029166059204729787, "loss": 1.2779, "step": 2170 }, { "epoch": 0.19037884216713777, "grad_norm": 0.07568359375, "learning_rate": 0.002916464794870685, "loss": 1.2741, "step": 2171 }, { "epoch": 0.19046653394151233, "grad_norm": 0.0966796875, "learning_rate": 0.002916323553770071, "loss": 1.3513, "step": 2172 }, { "epoch": 0.19055422571588687, "grad_norm": 0.1337890625, "learning_rate": 0.0029161821971840185, "loss": 1.3046, "step": 2173 }, { "epoch": 0.1906419174902614, "grad_norm": 0.07275390625, "learning_rate": 0.0029160407251254176, "loss": 1.2788, "step": 2174 }, { "epoch": 0.19072960926463597, "grad_norm": 0.0791015625, "learning_rate": 0.0029158991376071713, "loss": 1.2294, "step": 2175 }, { "epoch": 0.1908173010390105, "grad_norm": 0.0849609375, "learning_rate": 0.0029157574346421918, "loss": 1.2857, "step": 2176 }, { "epoch": 0.19090499281338505, "grad_norm": 0.07421875, "learning_rate": 0.0029156156162434012, "loss": 1.2705, "step": 2177 }, { "epoch": 0.1909926845877596, "grad_norm": 0.076171875, "learning_rate": 0.0029154736824237334, "loss": 1.3194, "step": 2178 }, { "epoch": 0.19108037636213415, "grad_norm": 0.07177734375, "learning_rate": 0.002915331633196133, "loss": 1.2467, "step": 2179 }, { "epoch": 0.1911680681365087, "grad_norm": 0.0703125, "learning_rate": 0.002915189468573554, "loss": 1.2335, "step": 2180 }, { "epoch": 0.19125575991088323, "grad_norm": 0.06689453125, "learning_rate": 0.0029150471885689613, "loss": 1.2648, "step": 2181 }, { "epoch": 0.1913434516852578, "grad_norm": 0.109375, "learning_rate": 0.002914904793195331, "loss": 1.2717, "step": 2182 }, { "epoch": 0.19143114345963233, "grad_norm": 0.08544921875, "learning_rate": 0.0029147622824656485, "loss": 1.3032, "step": 2183 }, { "epoch": 0.19151883523400687, "grad_norm": 0.11669921875, "learning_rate": 0.0029146196563929116, "loss": 1.2755, "step": 2184 }, { "epoch": 0.1916065270083814, "grad_norm": 0.1708984375, "learning_rate": 0.002914476914990126, "loss": 1.2718, "step": 2185 }, { "epoch": 0.19169421878275597, "grad_norm": 0.076171875, "learning_rate": 0.0029143340582703098, "loss": 1.2618, "step": 2186 }, { "epoch": 0.1917819105571305, "grad_norm": 0.142578125, "learning_rate": 0.0029141910862464917, "loss": 1.3013, "step": 2187 }, { "epoch": 0.19186960233150505, "grad_norm": 0.08349609375, "learning_rate": 0.0029140479989317103, "loss": 1.2856, "step": 2188 }, { "epoch": 0.19195729410587958, "grad_norm": 0.2138671875, "learning_rate": 0.002913904796339014, "loss": 1.2676, "step": 2189 }, { "epoch": 0.19204498588025415, "grad_norm": 0.09912109375, "learning_rate": 0.002913761478481463, "loss": 1.2774, "step": 2190 }, { "epoch": 0.1921326776546287, "grad_norm": 0.1474609375, "learning_rate": 0.002913618045372128, "loss": 1.3363, "step": 2191 }, { "epoch": 0.19222036942900322, "grad_norm": 0.1083984375, "learning_rate": 0.0029134744970240895, "loss": 1.3231, "step": 2192 }, { "epoch": 0.1923080612033778, "grad_norm": 0.10595703125, "learning_rate": 0.002913330833450438, "loss": 1.2895, "step": 2193 }, { "epoch": 0.19239575297775233, "grad_norm": 0.09765625, "learning_rate": 0.002913187054664276, "loss": 1.3274, "step": 2194 }, { "epoch": 0.19248344475212686, "grad_norm": 0.0654296875, "learning_rate": 0.0029130431606787153, "loss": 1.2597, "step": 2195 }, { "epoch": 0.1925711365265014, "grad_norm": 0.0771484375, "learning_rate": 0.0029128991515068787, "loss": 1.2553, "step": 2196 }, { "epoch": 0.19265882830087597, "grad_norm": 0.09814453125, "learning_rate": 0.0029127550271619003, "loss": 1.3077, "step": 2197 }, { "epoch": 0.1927465200752505, "grad_norm": 0.1279296875, "learning_rate": 0.0029126107876569223, "loss": 1.3561, "step": 2198 }, { "epoch": 0.19283421184962504, "grad_norm": 0.2470703125, "learning_rate": 0.002912466433005101, "loss": 1.2611, "step": 2199 }, { "epoch": 0.1929219036239996, "grad_norm": 0.080078125, "learning_rate": 0.0029123219632195985, "loss": 1.3284, "step": 2200 }, { "epoch": 0.19300959539837415, "grad_norm": 0.2392578125, "learning_rate": 0.002912177378313593, "loss": 1.267, "step": 2201 }, { "epoch": 0.19309728717274868, "grad_norm": 0.126953125, "learning_rate": 0.002912032678300268, "loss": 1.2827, "step": 2202 }, { "epoch": 0.19318497894712322, "grad_norm": 0.177734375, "learning_rate": 0.0029118878631928216, "loss": 1.2881, "step": 2203 }, { "epoch": 0.19327267072149779, "grad_norm": 0.1025390625, "learning_rate": 0.002911742933004459, "loss": 1.3347, "step": 2204 }, { "epoch": 0.19336036249587232, "grad_norm": 0.080078125, "learning_rate": 0.0029115978877483984, "loss": 1.2332, "step": 2205 }, { "epoch": 0.19344805427024686, "grad_norm": 0.06494140625, "learning_rate": 0.0029114527274378666, "loss": 1.2406, "step": 2206 }, { "epoch": 0.1935357460446214, "grad_norm": 0.05859375, "learning_rate": 0.0029113074520861035, "loss": 1.2749, "step": 2207 }, { "epoch": 0.19362343781899596, "grad_norm": 0.072265625, "learning_rate": 0.002911162061706356, "loss": 1.2389, "step": 2208 }, { "epoch": 0.1937111295933705, "grad_norm": 0.07080078125, "learning_rate": 0.0029110165563118844, "loss": 1.3312, "step": 2209 }, { "epoch": 0.19379882136774504, "grad_norm": 0.07421875, "learning_rate": 0.002910870935915958, "loss": 1.302, "step": 2210 }, { "epoch": 0.1938865131421196, "grad_norm": 0.07421875, "learning_rate": 0.0029107252005318575, "loss": 1.3257, "step": 2211 }, { "epoch": 0.19397420491649414, "grad_norm": 0.10302734375, "learning_rate": 0.002910579350172873, "loss": 1.2743, "step": 2212 }, { "epoch": 0.19406189669086868, "grad_norm": 0.072265625, "learning_rate": 0.002910433384852306, "loss": 1.2715, "step": 2213 }, { "epoch": 0.19414958846524322, "grad_norm": 0.0732421875, "learning_rate": 0.0029102873045834684, "loss": 1.2599, "step": 2214 }, { "epoch": 0.19423728023961778, "grad_norm": 0.06494140625, "learning_rate": 0.0029101411093796813, "loss": 1.2276, "step": 2215 }, { "epoch": 0.19432497201399232, "grad_norm": 0.07080078125, "learning_rate": 0.0029099947992542786, "loss": 1.2645, "step": 2216 }, { "epoch": 0.19441266378836686, "grad_norm": 0.07177734375, "learning_rate": 0.002909848374220603, "loss": 1.2392, "step": 2217 }, { "epoch": 0.19450035556274142, "grad_norm": 0.07177734375, "learning_rate": 0.0029097018342920077, "loss": 1.2703, "step": 2218 }, { "epoch": 0.19458804733711596, "grad_norm": 0.0673828125, "learning_rate": 0.0029095551794818568, "loss": 1.2913, "step": 2219 }, { "epoch": 0.1946757391114905, "grad_norm": 0.07080078125, "learning_rate": 0.002909408409803525, "loss": 1.2418, "step": 2220 }, { "epoch": 0.19476343088586504, "grad_norm": 0.07177734375, "learning_rate": 0.0029092615252703977, "loss": 1.3006, "step": 2221 }, { "epoch": 0.1948511226602396, "grad_norm": 0.0751953125, "learning_rate": 0.0029091145258958702, "loss": 1.3464, "step": 2222 }, { "epoch": 0.19493881443461414, "grad_norm": 0.08154296875, "learning_rate": 0.0029089674116933484, "loss": 1.2621, "step": 2223 }, { "epoch": 0.19502650620898868, "grad_norm": 0.06640625, "learning_rate": 0.002908820182676248, "loss": 1.2753, "step": 2224 }, { "epoch": 0.1951141979833632, "grad_norm": 0.11376953125, "learning_rate": 0.002908672838857997, "loss": 1.371, "step": 2225 }, { "epoch": 0.19520188975773778, "grad_norm": 0.1083984375, "learning_rate": 0.0029085253802520325, "loss": 1.2986, "step": 2226 }, { "epoch": 0.19528958153211232, "grad_norm": 0.06201171875, "learning_rate": 0.0029083778068718018, "loss": 1.2561, "step": 2227 }, { "epoch": 0.19537727330648685, "grad_norm": 0.1591796875, "learning_rate": 0.002908230118730764, "loss": 1.4475, "step": 2228 }, { "epoch": 0.19546496508086142, "grad_norm": 0.15625, "learning_rate": 0.002908082315842387, "loss": 1.2581, "step": 2229 }, { "epoch": 0.19555265685523596, "grad_norm": 0.1123046875, "learning_rate": 0.0029079343982201513, "loss": 1.2987, "step": 2230 }, { "epoch": 0.1956403486296105, "grad_norm": 0.26953125, "learning_rate": 0.0029077863658775447, "loss": 1.2585, "step": 2231 }, { "epoch": 0.19572804040398503, "grad_norm": 0.07568359375, "learning_rate": 0.0029076382188280695, "loss": 1.2266, "step": 2232 }, { "epoch": 0.1958157321783596, "grad_norm": 0.2392578125, "learning_rate": 0.002907489957085235, "loss": 1.3035, "step": 2233 }, { "epoch": 0.19590342395273413, "grad_norm": 0.0771484375, "learning_rate": 0.0029073415806625622, "loss": 1.2862, "step": 2234 }, { "epoch": 0.19599111572710867, "grad_norm": 0.1298828125, "learning_rate": 0.0029071930895735835, "loss": 1.256, "step": 2235 }, { "epoch": 0.19607880750148324, "grad_norm": 0.0927734375, "learning_rate": 0.0029070444838318406, "loss": 1.2602, "step": 2236 }, { "epoch": 0.19616649927585778, "grad_norm": 0.078125, "learning_rate": 0.0029068957634508855, "loss": 1.2328, "step": 2237 }, { "epoch": 0.1962541910502323, "grad_norm": 0.140625, "learning_rate": 0.002906746928444281, "loss": 1.2648, "step": 2238 }, { "epoch": 0.19634188282460685, "grad_norm": 0.08349609375, "learning_rate": 0.0029065979788256016, "loss": 1.3105, "step": 2239 }, { "epoch": 0.19642957459898142, "grad_norm": 0.11474609375, "learning_rate": 0.0029064489146084305, "loss": 1.3335, "step": 2240 }, { "epoch": 0.19651726637335595, "grad_norm": 0.0947265625, "learning_rate": 0.0029062997358063614, "loss": 1.243, "step": 2241 }, { "epoch": 0.1966049581477305, "grad_norm": 0.06689453125, "learning_rate": 0.0029061504424329997, "loss": 1.2862, "step": 2242 }, { "epoch": 0.19669264992210503, "grad_norm": 0.10595703125, "learning_rate": 0.0029060010345019604, "loss": 1.273, "step": 2243 }, { "epoch": 0.1967803416964796, "grad_norm": 0.068359375, "learning_rate": 0.0029058515120268693, "loss": 1.2379, "step": 2244 }, { "epoch": 0.19686803347085413, "grad_norm": 0.10791015625, "learning_rate": 0.0029057018750213623, "loss": 1.2659, "step": 2245 }, { "epoch": 0.19695572524522867, "grad_norm": 0.0771484375, "learning_rate": 0.0029055521234990856, "loss": 1.323, "step": 2246 }, { "epoch": 0.19704341701960323, "grad_norm": 0.06640625, "learning_rate": 0.0029054022574736965, "loss": 1.2854, "step": 2247 }, { "epoch": 0.19713110879397777, "grad_norm": 0.0927734375, "learning_rate": 0.0029052522769588618, "loss": 1.3263, "step": 2248 }, { "epoch": 0.1972188005683523, "grad_norm": 0.0888671875, "learning_rate": 0.002905102181968261, "loss": 1.215, "step": 2249 }, { "epoch": 0.19730649234272685, "grad_norm": 0.06494140625, "learning_rate": 0.0029049519725155805, "loss": 1.2983, "step": 2250 }, { "epoch": 0.1973941841171014, "grad_norm": 0.0849609375, "learning_rate": 0.00290480164861452, "loss": 1.2753, "step": 2251 }, { "epoch": 0.19748187589147595, "grad_norm": 0.08935546875, "learning_rate": 0.0029046512102787877, "loss": 1.2986, "step": 2252 }, { "epoch": 0.1975695676658505, "grad_norm": 0.140625, "learning_rate": 0.002904500657522105, "loss": 1.3322, "step": 2253 }, { "epoch": 0.19765725944022505, "grad_norm": 0.10888671875, "learning_rate": 0.0029043499903582, "loss": 1.3033, "step": 2254 }, { "epoch": 0.1977449512145996, "grad_norm": 0.0947265625, "learning_rate": 0.002904199208800814, "loss": 1.2517, "step": 2255 }, { "epoch": 0.19783264298897413, "grad_norm": 0.0986328125, "learning_rate": 0.0029040483128636983, "loss": 1.2724, "step": 2256 }, { "epoch": 0.19792033476334867, "grad_norm": 0.0908203125, "learning_rate": 0.0029038973025606136, "loss": 1.3009, "step": 2257 }, { "epoch": 0.19800802653772323, "grad_norm": 0.111328125, "learning_rate": 0.0029037461779053315, "loss": 1.1683, "step": 2258 }, { "epoch": 0.19809571831209777, "grad_norm": 0.09228515625, "learning_rate": 0.002903594938911635, "loss": 1.3031, "step": 2259 }, { "epoch": 0.1981834100864723, "grad_norm": 0.12255859375, "learning_rate": 0.0029034435855933164, "loss": 1.2716, "step": 2260 }, { "epoch": 0.19827110186084684, "grad_norm": 0.1005859375, "learning_rate": 0.0029032921179641775, "loss": 1.2845, "step": 2261 }, { "epoch": 0.1983587936352214, "grad_norm": 0.0869140625, "learning_rate": 0.002903140536038034, "loss": 1.2969, "step": 2262 }, { "epoch": 0.19844648540959595, "grad_norm": 0.1240234375, "learning_rate": 0.002902988839828708, "loss": 1.3202, "step": 2263 }, { "epoch": 0.19853417718397048, "grad_norm": 0.0986328125, "learning_rate": 0.002902837029350034, "loss": 1.2763, "step": 2264 }, { "epoch": 0.19862186895834505, "grad_norm": 0.1435546875, "learning_rate": 0.0029026851046158575, "loss": 1.261, "step": 2265 }, { "epoch": 0.1987095607327196, "grad_norm": 0.25, "learning_rate": 0.0029025330656400334, "loss": 1.3433, "step": 2266 }, { "epoch": 0.19879725250709412, "grad_norm": 0.0751953125, "learning_rate": 0.002902380912436427, "loss": 1.255, "step": 2267 }, { "epoch": 0.19888494428146866, "grad_norm": 0.212890625, "learning_rate": 0.0029022286450189143, "loss": 1.3035, "step": 2268 }, { "epoch": 0.19897263605584323, "grad_norm": 0.08349609375, "learning_rate": 0.002902076263401382, "loss": 1.3344, "step": 2269 }, { "epoch": 0.19906032783021776, "grad_norm": 0.140625, "learning_rate": 0.002901923767597726, "loss": 1.2781, "step": 2270 }, { "epoch": 0.1991480196045923, "grad_norm": 0.11376953125, "learning_rate": 0.0029017711576218553, "loss": 1.2766, "step": 2271 }, { "epoch": 0.19923571137896687, "grad_norm": 0.08642578125, "learning_rate": 0.0029016184334876858, "loss": 1.329, "step": 2272 }, { "epoch": 0.1993234031533414, "grad_norm": 0.1533203125, "learning_rate": 0.0029014655952091465, "loss": 1.3013, "step": 2273 }, { "epoch": 0.19941109492771594, "grad_norm": 0.1591796875, "learning_rate": 0.0029013126428001756, "loss": 1.3014, "step": 2274 }, { "epoch": 0.19949878670209048, "grad_norm": 0.07373046875, "learning_rate": 0.002901159576274721, "loss": 1.267, "step": 2275 }, { "epoch": 0.19958647847646505, "grad_norm": 0.1259765625, "learning_rate": 0.002901006395646744, "loss": 1.2836, "step": 2276 }, { "epoch": 0.19967417025083958, "grad_norm": 0.06787109375, "learning_rate": 0.0029008531009302127, "loss": 1.2586, "step": 2277 }, { "epoch": 0.19976186202521412, "grad_norm": 0.12158203125, "learning_rate": 0.0029006996921391074, "loss": 1.2357, "step": 2278 }, { "epoch": 0.19984955379958866, "grad_norm": 0.07177734375, "learning_rate": 0.0029005461692874195, "loss": 1.2212, "step": 2279 }, { "epoch": 0.19993724557396322, "grad_norm": 0.1298828125, "learning_rate": 0.0029003925323891487, "loss": 1.3103, "step": 2280 }, { "epoch": 0.20002493734833776, "grad_norm": 0.1611328125, "learning_rate": 0.002900238781458307, "loss": 1.2774, "step": 2281 }, { "epoch": 0.2001126291227123, "grad_norm": 0.08203125, "learning_rate": 0.002900084916508916, "loss": 1.2849, "step": 2282 }, { "epoch": 0.20020032089708686, "grad_norm": 0.15234375, "learning_rate": 0.002899930937555008, "loss": 1.1996, "step": 2283 }, { "epoch": 0.2002880126714614, "grad_norm": 0.08984375, "learning_rate": 0.0028997768446106247, "loss": 1.2579, "step": 2284 }, { "epoch": 0.20037570444583594, "grad_norm": 0.1875, "learning_rate": 0.002899622637689819, "loss": 1.3244, "step": 2285 }, { "epoch": 0.20046339622021048, "grad_norm": 0.068359375, "learning_rate": 0.0028994683168066557, "loss": 1.2868, "step": 2286 }, { "epoch": 0.20055108799458504, "grad_norm": 0.1533203125, "learning_rate": 0.0028993138819752066, "loss": 1.3437, "step": 2287 }, { "epoch": 0.20063877976895958, "grad_norm": 0.083984375, "learning_rate": 0.002899159333209557, "loss": 1.3052, "step": 2288 }, { "epoch": 0.20072647154333412, "grad_norm": 0.18359375, "learning_rate": 0.0028990046705238003, "loss": 1.3225, "step": 2289 }, { "epoch": 0.20081416331770865, "grad_norm": 0.076171875, "learning_rate": 0.0028988498939320428, "loss": 1.2942, "step": 2290 }, { "epoch": 0.20090185509208322, "grad_norm": 0.1591796875, "learning_rate": 0.002898695003448398, "loss": 1.2647, "step": 2291 }, { "epoch": 0.20098954686645776, "grad_norm": 0.06298828125, "learning_rate": 0.0028985399990869927, "loss": 1.301, "step": 2292 }, { "epoch": 0.2010772386408323, "grad_norm": 0.1650390625, "learning_rate": 0.002898384880861963, "loss": 1.2491, "step": 2293 }, { "epoch": 0.20116493041520686, "grad_norm": 0.0703125, "learning_rate": 0.0028982296487874543, "loss": 1.3032, "step": 2294 }, { "epoch": 0.2012526221895814, "grad_norm": 0.11376953125, "learning_rate": 0.0028980743028776245, "loss": 1.2988, "step": 2295 }, { "epoch": 0.20134031396395594, "grad_norm": 0.0751953125, "learning_rate": 0.0028979188431466395, "loss": 1.3103, "step": 2296 }, { "epoch": 0.20142800573833047, "grad_norm": 0.0732421875, "learning_rate": 0.002897763269608678, "loss": 1.2971, "step": 2297 }, { "epoch": 0.20151569751270504, "grad_norm": 0.08203125, "learning_rate": 0.0028976075822779272, "loss": 1.2683, "step": 2298 }, { "epoch": 0.20160338928707958, "grad_norm": 0.0859375, "learning_rate": 0.0028974517811685852, "loss": 1.2985, "step": 2299 }, { "epoch": 0.2016910810614541, "grad_norm": 0.0810546875, "learning_rate": 0.002897295866294862, "loss": 1.2858, "step": 2300 }, { "epoch": 0.20177877283582868, "grad_norm": 0.09716796875, "learning_rate": 0.0028971398376709746, "loss": 1.3451, "step": 2301 }, { "epoch": 0.20186646461020322, "grad_norm": 0.06591796875, "learning_rate": 0.002896983695311154, "loss": 1.2416, "step": 2302 }, { "epoch": 0.20195415638457775, "grad_norm": 0.08544921875, "learning_rate": 0.002896827439229639, "loss": 1.2343, "step": 2303 }, { "epoch": 0.2020418481589523, "grad_norm": 0.0693359375, "learning_rate": 0.002896671069440681, "loss": 1.2479, "step": 2304 }, { "epoch": 0.20212953993332686, "grad_norm": 0.1435546875, "learning_rate": 0.0028965145859585393, "loss": 1.2542, "step": 2305 }, { "epoch": 0.2022172317077014, "grad_norm": 0.1640625, "learning_rate": 0.0028963579887974853, "loss": 1.3069, "step": 2306 }, { "epoch": 0.20230492348207593, "grad_norm": 0.0703125, "learning_rate": 0.0028962012779718002, "loss": 1.2393, "step": 2307 }, { "epoch": 0.20239261525645047, "grad_norm": 0.1298828125, "learning_rate": 0.0028960444534957756, "loss": 1.1868, "step": 2308 }, { "epoch": 0.20248030703082504, "grad_norm": 0.09228515625, "learning_rate": 0.0028958875153837133, "loss": 1.3513, "step": 2309 }, { "epoch": 0.20256799880519957, "grad_norm": 0.0830078125, "learning_rate": 0.002895730463649926, "loss": 1.2728, "step": 2310 }, { "epoch": 0.2026556905795741, "grad_norm": 0.08349609375, "learning_rate": 0.002895573298308736, "loss": 1.255, "step": 2311 }, { "epoch": 0.20274338235394868, "grad_norm": 0.08154296875, "learning_rate": 0.0028954160193744773, "loss": 1.2665, "step": 2312 }, { "epoch": 0.2028310741283232, "grad_norm": 0.1533203125, "learning_rate": 0.002895258626861492, "loss": 1.3099, "step": 2313 }, { "epoch": 0.20291876590269775, "grad_norm": 0.07421875, "learning_rate": 0.0028951011207841346, "loss": 1.2123, "step": 2314 }, { "epoch": 0.2030064576770723, "grad_norm": 0.06689453125, "learning_rate": 0.00289494350115677, "loss": 1.2963, "step": 2315 }, { "epoch": 0.20309414945144685, "grad_norm": 0.138671875, "learning_rate": 0.002894785767993771, "loss": 1.2644, "step": 2316 }, { "epoch": 0.2031818412258214, "grad_norm": 0.0771484375, "learning_rate": 0.0028946279213095233, "loss": 1.2715, "step": 2317 }, { "epoch": 0.20326953300019593, "grad_norm": 0.1015625, "learning_rate": 0.0028944699611184225, "loss": 1.3062, "step": 2318 }, { "epoch": 0.2033572247745705, "grad_norm": 0.1826171875, "learning_rate": 0.002894311887434874, "loss": 1.2904, "step": 2319 }, { "epoch": 0.20344491654894503, "grad_norm": 0.1884765625, "learning_rate": 0.0028941537002732933, "loss": 1.2718, "step": 2320 }, { "epoch": 0.20353260832331957, "grad_norm": 0.142578125, "learning_rate": 0.002893995399648107, "loss": 1.2735, "step": 2321 }, { "epoch": 0.2036203000976941, "grad_norm": 0.203125, "learning_rate": 0.0028938369855737514, "loss": 1.2833, "step": 2322 }, { "epoch": 0.20370799187206867, "grad_norm": 0.09326171875, "learning_rate": 0.002893678458064674, "loss": 1.27, "step": 2323 }, { "epoch": 0.2037956836464432, "grad_norm": 0.1708984375, "learning_rate": 0.002893519817135332, "loss": 1.2796, "step": 2324 }, { "epoch": 0.20388337542081775, "grad_norm": 0.07421875, "learning_rate": 0.0028933610628001922, "loss": 1.3464, "step": 2325 }, { "epoch": 0.20397106719519228, "grad_norm": 0.08837890625, "learning_rate": 0.0028932021950737333, "loss": 1.242, "step": 2326 }, { "epoch": 0.20405875896956685, "grad_norm": 0.087890625, "learning_rate": 0.0028930432139704435, "loss": 1.2869, "step": 2327 }, { "epoch": 0.2041464507439414, "grad_norm": 0.078125, "learning_rate": 0.0028928841195048215, "loss": 1.2469, "step": 2328 }, { "epoch": 0.20423414251831593, "grad_norm": 0.064453125, "learning_rate": 0.0028927249116913766, "loss": 1.2399, "step": 2329 }, { "epoch": 0.2043218342926905, "grad_norm": 0.0791015625, "learning_rate": 0.0028925655905446277, "loss": 1.2274, "step": 2330 }, { "epoch": 0.20440952606706503, "grad_norm": 0.0673828125, "learning_rate": 0.002892406156079104, "loss": 1.2703, "step": 2331 }, { "epoch": 0.20449721784143957, "grad_norm": 0.099609375, "learning_rate": 0.002892246608309347, "loss": 1.2229, "step": 2332 }, { "epoch": 0.2045849096158141, "grad_norm": 0.06884765625, "learning_rate": 0.0028920869472499056, "loss": 1.2837, "step": 2333 }, { "epoch": 0.20467260139018867, "grad_norm": 0.18359375, "learning_rate": 0.0028919271729153416, "loss": 1.2874, "step": 2334 }, { "epoch": 0.2047602931645632, "grad_norm": 0.1474609375, "learning_rate": 0.002891767285320225, "loss": 1.2437, "step": 2335 }, { "epoch": 0.20484798493893774, "grad_norm": 0.09423828125, "learning_rate": 0.0028916072844791378, "loss": 1.3894, "step": 2336 }, { "epoch": 0.2049356767133123, "grad_norm": 0.15234375, "learning_rate": 0.002891447170406671, "loss": 1.259, "step": 2337 }, { "epoch": 0.20502336848768685, "grad_norm": 0.10595703125, "learning_rate": 0.0028912869431174278, "loss": 1.3135, "step": 2338 }, { "epoch": 0.20511106026206138, "grad_norm": 0.140625, "learning_rate": 0.002891126602626019, "loss": 1.2941, "step": 2339 }, { "epoch": 0.20519875203643592, "grad_norm": 0.0947265625, "learning_rate": 0.0028909661489470682, "loss": 1.3052, "step": 2340 }, { "epoch": 0.2052864438108105, "grad_norm": 0.169921875, "learning_rate": 0.002890805582095209, "loss": 1.2315, "step": 2341 }, { "epoch": 0.20537413558518502, "grad_norm": 0.0771484375, "learning_rate": 0.002890644902085083, "loss": 1.2922, "step": 2342 }, { "epoch": 0.20546182735955956, "grad_norm": 0.154296875, "learning_rate": 0.0028904841089313444, "loss": 1.2749, "step": 2343 }, { "epoch": 0.2055495191339341, "grad_norm": 0.06787109375, "learning_rate": 0.0028903232026486576, "loss": 1.2851, "step": 2344 }, { "epoch": 0.20563721090830867, "grad_norm": 0.095703125, "learning_rate": 0.0028901621832516967, "loss": 1.2768, "step": 2345 }, { "epoch": 0.2057249026826832, "grad_norm": 0.08544921875, "learning_rate": 0.0028900010507551463, "loss": 1.3379, "step": 2346 }, { "epoch": 0.20581259445705774, "grad_norm": 0.076171875, "learning_rate": 0.002889839805173701, "loss": 1.3307, "step": 2347 }, { "epoch": 0.2059002862314323, "grad_norm": 0.08154296875, "learning_rate": 0.002889678446522066, "loss": 1.2209, "step": 2348 }, { "epoch": 0.20598797800580684, "grad_norm": 0.10107421875, "learning_rate": 0.002889516974814957, "loss": 1.2045, "step": 2349 }, { "epoch": 0.20607566978018138, "grad_norm": 0.09619140625, "learning_rate": 0.0028893553900670994, "loss": 1.2713, "step": 2350 }, { "epoch": 0.20616336155455592, "grad_norm": 0.1591796875, "learning_rate": 0.00288919369229323, "loss": 1.3164, "step": 2351 }, { "epoch": 0.20625105332893048, "grad_norm": 0.1904296875, "learning_rate": 0.0028890318815080946, "loss": 1.3303, "step": 2352 }, { "epoch": 0.20633874510330502, "grad_norm": 0.0908203125, "learning_rate": 0.0028888699577264503, "loss": 1.3291, "step": 2353 }, { "epoch": 0.20642643687767956, "grad_norm": 0.1796875, "learning_rate": 0.002888707920963064, "loss": 1.2635, "step": 2354 }, { "epoch": 0.20651412865205412, "grad_norm": 0.07275390625, "learning_rate": 0.0028885457712327133, "loss": 1.2906, "step": 2355 }, { "epoch": 0.20660182042642866, "grad_norm": 0.248046875, "learning_rate": 0.002888383508550185, "loss": 1.2984, "step": 2356 }, { "epoch": 0.2066895122008032, "grad_norm": 0.08056640625, "learning_rate": 0.0028882211329302773, "loss": 1.2881, "step": 2357 }, { "epoch": 0.20677720397517774, "grad_norm": 0.2080078125, "learning_rate": 0.0028880586443878, "loss": 1.2774, "step": 2358 }, { "epoch": 0.2068648957495523, "grad_norm": 0.06494140625, "learning_rate": 0.002887896042937569, "loss": 1.288, "step": 2359 }, { "epoch": 0.20695258752392684, "grad_norm": 0.2021484375, "learning_rate": 0.0028877333285944155, "loss": 1.3116, "step": 2360 }, { "epoch": 0.20704027929830138, "grad_norm": 0.09814453125, "learning_rate": 0.0028875705013731764, "loss": 1.1944, "step": 2361 }, { "epoch": 0.20712797107267591, "grad_norm": 0.09619140625, "learning_rate": 0.0028874075612887037, "loss": 1.2565, "step": 2362 }, { "epoch": 0.20721566284705048, "grad_norm": 0.08349609375, "learning_rate": 0.002887244508355855, "loss": 1.2087, "step": 2363 }, { "epoch": 0.20730335462142502, "grad_norm": 0.1005859375, "learning_rate": 0.0028870813425895016, "loss": 1.2417, "step": 2364 }, { "epoch": 0.20739104639579956, "grad_norm": 0.08056640625, "learning_rate": 0.002886918064004523, "loss": 1.2932, "step": 2365 }, { "epoch": 0.20747873817017412, "grad_norm": 0.0908203125, "learning_rate": 0.0028867546726158095, "loss": 1.3138, "step": 2366 }, { "epoch": 0.20756642994454866, "grad_norm": 0.10400390625, "learning_rate": 0.0028865911684382633, "loss": 1.2595, "step": 2367 }, { "epoch": 0.2076541217189232, "grad_norm": 0.08203125, "learning_rate": 0.0028864275514867946, "loss": 1.288, "step": 2368 }, { "epoch": 0.20774181349329773, "grad_norm": 0.1494140625, "learning_rate": 0.0028862638217763245, "loss": 1.332, "step": 2369 }, { "epoch": 0.2078295052676723, "grad_norm": 0.07421875, "learning_rate": 0.0028860999793217856, "loss": 1.254, "step": 2370 }, { "epoch": 0.20791719704204684, "grad_norm": 0.07861328125, "learning_rate": 0.00288593602413812, "loss": 1.262, "step": 2371 }, { "epoch": 0.20800488881642137, "grad_norm": 0.0859375, "learning_rate": 0.0028857719562402793, "loss": 1.3785, "step": 2372 }, { "epoch": 0.20809258059079594, "grad_norm": 0.07470703125, "learning_rate": 0.002885607775643226, "loss": 1.2854, "step": 2373 }, { "epoch": 0.20818027236517048, "grad_norm": 0.087890625, "learning_rate": 0.0028854434823619335, "loss": 1.2861, "step": 2374 }, { "epoch": 0.20826796413954501, "grad_norm": 0.0859375, "learning_rate": 0.0028852790764113847, "loss": 1.2589, "step": 2375 }, { "epoch": 0.20835565591391955, "grad_norm": 0.1337890625, "learning_rate": 0.002885114557806573, "loss": 1.2286, "step": 2376 }, { "epoch": 0.20844334768829412, "grad_norm": 0.0751953125, "learning_rate": 0.002884949926562502, "loss": 1.2787, "step": 2377 }, { "epoch": 0.20853103946266865, "grad_norm": 0.115234375, "learning_rate": 0.0028847851826941853, "loss": 1.2505, "step": 2378 }, { "epoch": 0.2086187312370432, "grad_norm": 0.07666015625, "learning_rate": 0.0028846203262166484, "loss": 1.2991, "step": 2379 }, { "epoch": 0.20870642301141773, "grad_norm": 0.12353515625, "learning_rate": 0.0028844553571449245, "loss": 1.2878, "step": 2380 }, { "epoch": 0.2087941147857923, "grad_norm": 0.10400390625, "learning_rate": 0.002884290275494059, "loss": 1.3179, "step": 2381 }, { "epoch": 0.20888180656016683, "grad_norm": 0.0732421875, "learning_rate": 0.002884125081279106, "loss": 1.2493, "step": 2382 }, { "epoch": 0.20896949833454137, "grad_norm": 0.078125, "learning_rate": 0.002883959774515132, "loss": 1.2542, "step": 2383 }, { "epoch": 0.20905719010891594, "grad_norm": 0.072265625, "learning_rate": 0.0028837943552172114, "loss": 1.3115, "step": 2384 }, { "epoch": 0.20914488188329047, "grad_norm": 0.08349609375, "learning_rate": 0.002883628823400431, "loss": 1.238, "step": 2385 }, { "epoch": 0.209232573657665, "grad_norm": 0.12109375, "learning_rate": 0.002883463179079887, "loss": 1.2661, "step": 2386 }, { "epoch": 0.20932026543203955, "grad_norm": 0.09033203125, "learning_rate": 0.0028832974222706845, "loss": 1.2511, "step": 2387 }, { "epoch": 0.2094079572064141, "grad_norm": 0.0927734375, "learning_rate": 0.002883131552987941, "loss": 1.2618, "step": 2388 }, { "epoch": 0.20949564898078865, "grad_norm": 0.059326171875, "learning_rate": 0.0028829655712467837, "loss": 1.2583, "step": 2389 }, { "epoch": 0.2095833407551632, "grad_norm": 0.0888671875, "learning_rate": 0.002882799477062349, "loss": 1.2627, "step": 2390 }, { "epoch": 0.20967103252953775, "grad_norm": 0.1005859375, "learning_rate": 0.002882633270449784, "loss": 1.2647, "step": 2391 }, { "epoch": 0.2097587243039123, "grad_norm": 0.150390625, "learning_rate": 0.002882466951424247, "loss": 1.2564, "step": 2392 }, { "epoch": 0.20984641607828683, "grad_norm": 0.09521484375, "learning_rate": 0.0028823005200009056, "loss": 1.2006, "step": 2393 }, { "epoch": 0.20993410785266137, "grad_norm": 0.1767578125, "learning_rate": 0.002882133976194938, "loss": 1.2203, "step": 2394 }, { "epoch": 0.21002179962703593, "grad_norm": 0.08935546875, "learning_rate": 0.0028819673200215327, "loss": 1.3026, "step": 2395 }, { "epoch": 0.21010949140141047, "grad_norm": 0.140625, "learning_rate": 0.002881800551495888, "loss": 1.2158, "step": 2396 }, { "epoch": 0.210197183175785, "grad_norm": 0.126953125, "learning_rate": 0.002881633670633213, "loss": 1.2533, "step": 2397 }, { "epoch": 0.21028487495015954, "grad_norm": 0.07080078125, "learning_rate": 0.002881466677448726, "loss": 1.3119, "step": 2398 }, { "epoch": 0.2103725667245341, "grad_norm": 0.162109375, "learning_rate": 0.0028812995719576574, "loss": 1.2855, "step": 2399 }, { "epoch": 0.21046025849890865, "grad_norm": 0.0986328125, "learning_rate": 0.0028811323541752466, "loss": 1.3705, "step": 2400 }, { "epoch": 0.21054795027328319, "grad_norm": 0.10400390625, "learning_rate": 0.0028809650241167434, "loss": 1.2604, "step": 2401 }, { "epoch": 0.21063564204765775, "grad_norm": 0.07763671875, "learning_rate": 0.0028807975817974076, "loss": 1.2635, "step": 2402 }, { "epoch": 0.2107233338220323, "grad_norm": 0.1044921875, "learning_rate": 0.002880630027232509, "loss": 1.2807, "step": 2403 }, { "epoch": 0.21081102559640683, "grad_norm": 0.07568359375, "learning_rate": 0.0028804623604373296, "loss": 1.2784, "step": 2404 }, { "epoch": 0.21089871737078136, "grad_norm": 0.10986328125, "learning_rate": 0.0028802945814271593, "loss": 1.284, "step": 2405 }, { "epoch": 0.21098640914515593, "grad_norm": 0.087890625, "learning_rate": 0.002880126690217299, "loss": 1.2666, "step": 2406 }, { "epoch": 0.21107410091953047, "grad_norm": 0.09716796875, "learning_rate": 0.00287995868682306, "loss": 1.2065, "step": 2407 }, { "epoch": 0.211161792693905, "grad_norm": 0.07470703125, "learning_rate": 0.0028797905712597636, "loss": 1.2137, "step": 2408 }, { "epoch": 0.21124948446827957, "grad_norm": 0.1025390625, "learning_rate": 0.0028796223435427425, "loss": 1.2932, "step": 2409 }, { "epoch": 0.2113371762426541, "grad_norm": 0.20703125, "learning_rate": 0.002879454003687337, "loss": 1.2784, "step": 2410 }, { "epoch": 0.21142486801702864, "grad_norm": 0.10400390625, "learning_rate": 0.0028792855517089013, "loss": 1.2087, "step": 2411 }, { "epoch": 0.21151255979140318, "grad_norm": 0.134765625, "learning_rate": 0.0028791169876227958, "loss": 1.3072, "step": 2412 }, { "epoch": 0.21160025156577775, "grad_norm": 0.103515625, "learning_rate": 0.0028789483114443942, "loss": 1.2263, "step": 2413 }, { "epoch": 0.21168794334015228, "grad_norm": 0.1494140625, "learning_rate": 0.002878779523189079, "loss": 1.297, "step": 2414 }, { "epoch": 0.21177563511452682, "grad_norm": 0.1455078125, "learning_rate": 0.0028786106228722436, "loss": 1.2567, "step": 2415 }, { "epoch": 0.21186332688890136, "grad_norm": 0.1435546875, "learning_rate": 0.002878441610509291, "loss": 1.3334, "step": 2416 }, { "epoch": 0.21195101866327593, "grad_norm": 0.1875, "learning_rate": 0.002878272486115635, "loss": 1.2781, "step": 2417 }, { "epoch": 0.21203871043765046, "grad_norm": 0.068359375, "learning_rate": 0.002878103249706699, "loss": 1.2451, "step": 2418 }, { "epoch": 0.212126402212025, "grad_norm": 0.125, "learning_rate": 0.0028779339012979166, "loss": 1.2909, "step": 2419 }, { "epoch": 0.21221409398639957, "grad_norm": 0.059814453125, "learning_rate": 0.002877764440904732, "loss": 1.2476, "step": 2420 }, { "epoch": 0.2123017857607741, "grad_norm": 0.14453125, "learning_rate": 0.0028775948685426, "loss": 1.2885, "step": 2421 }, { "epoch": 0.21238947753514864, "grad_norm": 0.08984375, "learning_rate": 0.0028774251842269855, "loss": 1.2551, "step": 2422 }, { "epoch": 0.21247716930952318, "grad_norm": 0.12890625, "learning_rate": 0.002877255387973362, "loss": 1.2865, "step": 2423 }, { "epoch": 0.21256486108389774, "grad_norm": 0.1005859375, "learning_rate": 0.0028770854797972164, "loss": 1.2609, "step": 2424 }, { "epoch": 0.21265255285827228, "grad_norm": 0.12109375, "learning_rate": 0.002876915459714042, "loss": 1.2348, "step": 2425 }, { "epoch": 0.21274024463264682, "grad_norm": 0.09619140625, "learning_rate": 0.0028767453277393448, "loss": 1.2782, "step": 2426 }, { "epoch": 0.21282793640702138, "grad_norm": 0.07470703125, "learning_rate": 0.0028765750838886414, "loss": 1.2089, "step": 2427 }, { "epoch": 0.21291562818139592, "grad_norm": 0.11767578125, "learning_rate": 0.0028764047281774558, "loss": 1.2045, "step": 2428 }, { "epoch": 0.21300331995577046, "grad_norm": 0.0634765625, "learning_rate": 0.0028762342606213254, "loss": 1.332, "step": 2429 }, { "epoch": 0.213091011730145, "grad_norm": 0.125, "learning_rate": 0.0028760636812357955, "loss": 1.3101, "step": 2430 }, { "epoch": 0.21317870350451956, "grad_norm": 0.06591796875, "learning_rate": 0.0028758929900364236, "loss": 1.2876, "step": 2431 }, { "epoch": 0.2132663952788941, "grad_norm": 0.119140625, "learning_rate": 0.002875722187038775, "loss": 1.2818, "step": 2432 }, { "epoch": 0.21335408705326864, "grad_norm": 0.1201171875, "learning_rate": 0.0028755512722584277, "loss": 1.3046, "step": 2433 }, { "epoch": 0.21344177882764317, "grad_norm": 0.232421875, "learning_rate": 0.002875380245710968, "loss": 1.3125, "step": 2434 }, { "epoch": 0.21352947060201774, "grad_norm": 0.061767578125, "learning_rate": 0.002875209107411993, "loss": 1.3793, "step": 2435 }, { "epoch": 0.21361716237639228, "grad_norm": 0.1953125, "learning_rate": 0.0028750378573771103, "loss": 1.2415, "step": 2436 }, { "epoch": 0.21370485415076682, "grad_norm": 0.087890625, "learning_rate": 0.0028748664956219374, "loss": 1.2343, "step": 2437 }, { "epoch": 0.21379254592514138, "grad_norm": 0.09716796875, "learning_rate": 0.002874695022162102, "loss": 1.2555, "step": 2438 }, { "epoch": 0.21388023769951592, "grad_norm": 0.0625, "learning_rate": 0.0028745234370132424, "loss": 1.2617, "step": 2439 }, { "epoch": 0.21396792947389046, "grad_norm": 0.1015625, "learning_rate": 0.0028743517401910064, "loss": 1.275, "step": 2440 }, { "epoch": 0.214055621248265, "grad_norm": 0.08642578125, "learning_rate": 0.0028741799317110518, "loss": 1.2851, "step": 2441 }, { "epoch": 0.21414331302263956, "grad_norm": 0.10107421875, "learning_rate": 0.002874008011589048, "loss": 1.3522, "step": 2442 }, { "epoch": 0.2142310047970141, "grad_norm": 0.16796875, "learning_rate": 0.0028738359798406734, "loss": 1.2744, "step": 2443 }, { "epoch": 0.21431869657138863, "grad_norm": 0.11083984375, "learning_rate": 0.0028736638364816164, "loss": 1.2942, "step": 2444 }, { "epoch": 0.2144063883457632, "grad_norm": 0.119140625, "learning_rate": 0.0028734915815275767, "loss": 1.2563, "step": 2445 }, { "epoch": 0.21449408012013774, "grad_norm": 0.12890625, "learning_rate": 0.0028733192149942633, "loss": 1.2987, "step": 2446 }, { "epoch": 0.21458177189451227, "grad_norm": 0.08251953125, "learning_rate": 0.0028731467368973956, "loss": 1.3395, "step": 2447 }, { "epoch": 0.2146694636688868, "grad_norm": 0.07861328125, "learning_rate": 0.0028729741472527026, "loss": 1.2985, "step": 2448 }, { "epoch": 0.21475715544326138, "grad_norm": 0.1025390625, "learning_rate": 0.0028728014460759244, "loss": 1.2833, "step": 2449 }, { "epoch": 0.21484484721763591, "grad_norm": 0.07861328125, "learning_rate": 0.0028726286333828114, "loss": 1.2714, "step": 2450 }, { "epoch": 0.21493253899201045, "grad_norm": 0.095703125, "learning_rate": 0.002872455709189123, "loss": 1.2647, "step": 2451 }, { "epoch": 0.215020230766385, "grad_norm": 0.068359375, "learning_rate": 0.00287228267351063, "loss": 1.3379, "step": 2452 }, { "epoch": 0.21510792254075956, "grad_norm": 0.1298828125, "learning_rate": 0.002872109526363112, "loss": 1.2657, "step": 2453 }, { "epoch": 0.2151956143151341, "grad_norm": 0.1396484375, "learning_rate": 0.0028719362677623603, "loss": 1.2238, "step": 2454 }, { "epoch": 0.21528330608950863, "grad_norm": 0.111328125, "learning_rate": 0.0028717628977241753, "loss": 1.2976, "step": 2455 }, { "epoch": 0.2153709978638832, "grad_norm": 0.07666015625, "learning_rate": 0.0028715894162643675, "loss": 1.2645, "step": 2456 }, { "epoch": 0.21545868963825773, "grad_norm": 0.09619140625, "learning_rate": 0.0028714158233987593, "loss": 1.3058, "step": 2457 }, { "epoch": 0.21554638141263227, "grad_norm": 0.07568359375, "learning_rate": 0.002871242119143181, "loss": 1.2316, "step": 2458 }, { "epoch": 0.2156340731870068, "grad_norm": 0.11376953125, "learning_rate": 0.002871068303513474, "loss": 1.3226, "step": 2459 }, { "epoch": 0.21572176496138137, "grad_norm": 0.138671875, "learning_rate": 0.0028708943765254893, "loss": 1.358, "step": 2460 }, { "epoch": 0.2158094567357559, "grad_norm": 0.09521484375, "learning_rate": 0.0028707203381950893, "loss": 1.3389, "step": 2461 }, { "epoch": 0.21589714851013045, "grad_norm": 0.123046875, "learning_rate": 0.0028705461885381463, "loss": 1.265, "step": 2462 }, { "epoch": 0.21598484028450501, "grad_norm": 0.08740234375, "learning_rate": 0.0028703719275705412, "loss": 1.2276, "step": 2463 }, { "epoch": 0.21607253205887955, "grad_norm": 0.1396484375, "learning_rate": 0.002870197555308167, "loss": 1.2937, "step": 2464 }, { "epoch": 0.2161602238332541, "grad_norm": 0.076171875, "learning_rate": 0.0028700230717669255, "loss": 1.2537, "step": 2465 }, { "epoch": 0.21624791560762863, "grad_norm": 0.09912109375, "learning_rate": 0.0028698484769627297, "loss": 1.2535, "step": 2466 }, { "epoch": 0.2163356073820032, "grad_norm": 0.08349609375, "learning_rate": 0.0028696737709115014, "loss": 1.2793, "step": 2467 }, { "epoch": 0.21642329915637773, "grad_norm": 0.1396484375, "learning_rate": 0.002869498953629174, "loss": 1.2962, "step": 2468 }, { "epoch": 0.21651099093075227, "grad_norm": 0.197265625, "learning_rate": 0.00286932402513169, "loss": 1.3411, "step": 2469 }, { "epoch": 0.2165986827051268, "grad_norm": 0.0673828125, "learning_rate": 0.002869148985435003, "loss": 1.242, "step": 2470 }, { "epoch": 0.21668637447950137, "grad_norm": 0.1337890625, "learning_rate": 0.002868973834555075, "loss": 1.2965, "step": 2471 }, { "epoch": 0.2167740662538759, "grad_norm": 0.07568359375, "learning_rate": 0.0028687985725078806, "loss": 1.2913, "step": 2472 }, { "epoch": 0.21686175802825045, "grad_norm": 0.1201171875, "learning_rate": 0.0028686231993094026, "loss": 1.2165, "step": 2473 }, { "epoch": 0.216949449802625, "grad_norm": 0.0615234375, "learning_rate": 0.0028684477149756344, "loss": 1.2703, "step": 2474 }, { "epoch": 0.21703714157699955, "grad_norm": 0.125, "learning_rate": 0.0028682721195225805, "loss": 1.2611, "step": 2475 }, { "epoch": 0.21712483335137409, "grad_norm": 0.0947265625, "learning_rate": 0.0028680964129662537, "loss": 1.2425, "step": 2476 }, { "epoch": 0.21721252512574862, "grad_norm": 0.126953125, "learning_rate": 0.002867920595322679, "loss": 1.1872, "step": 2477 }, { "epoch": 0.2173002169001232, "grad_norm": 0.09130859375, "learning_rate": 0.00286774466660789, "loss": 1.236, "step": 2478 }, { "epoch": 0.21738790867449773, "grad_norm": 0.1279296875, "learning_rate": 0.0028675686268379305, "loss": 1.2327, "step": 2479 }, { "epoch": 0.21747560044887226, "grad_norm": 0.06396484375, "learning_rate": 0.002867392476028856, "loss": 1.2727, "step": 2480 }, { "epoch": 0.21756329222324683, "grad_norm": 0.1328125, "learning_rate": 0.00286721621419673, "loss": 1.2478, "step": 2481 }, { "epoch": 0.21765098399762137, "grad_norm": 0.07958984375, "learning_rate": 0.0028670398413576277, "loss": 1.2714, "step": 2482 }, { "epoch": 0.2177386757719959, "grad_norm": 0.0810546875, "learning_rate": 0.0028668633575276332, "loss": 1.2996, "step": 2483 }, { "epoch": 0.21782636754637044, "grad_norm": 0.07763671875, "learning_rate": 0.002866686762722842, "loss": 1.2523, "step": 2484 }, { "epoch": 0.217914059320745, "grad_norm": 0.07080078125, "learning_rate": 0.0028665100569593596, "loss": 1.3229, "step": 2485 }, { "epoch": 0.21800175109511954, "grad_norm": 0.08251953125, "learning_rate": 0.0028663332402533, "loss": 1.2549, "step": 2486 }, { "epoch": 0.21808944286949408, "grad_norm": 0.154296875, "learning_rate": 0.0028661563126207885, "loss": 1.3275, "step": 2487 }, { "epoch": 0.21817713464386862, "grad_norm": 0.0703125, "learning_rate": 0.0028659792740779607, "loss": 1.2913, "step": 2488 }, { "epoch": 0.21826482641824319, "grad_norm": 0.1474609375, "learning_rate": 0.0028658021246409627, "loss": 1.2852, "step": 2489 }, { "epoch": 0.21835251819261772, "grad_norm": 0.158203125, "learning_rate": 0.002865624864325949, "loss": 1.3334, "step": 2490 }, { "epoch": 0.21844020996699226, "grad_norm": 0.08203125, "learning_rate": 0.0028654474931490864, "loss": 1.2414, "step": 2491 }, { "epoch": 0.21852790174136683, "grad_norm": 0.140625, "learning_rate": 0.0028652700111265494, "loss": 1.2649, "step": 2492 }, { "epoch": 0.21861559351574136, "grad_norm": 0.078125, "learning_rate": 0.002865092418274525, "loss": 1.2948, "step": 2493 }, { "epoch": 0.2187032852901159, "grad_norm": 0.06787109375, "learning_rate": 0.0028649147146092087, "loss": 1.3084, "step": 2494 }, { "epoch": 0.21879097706449044, "grad_norm": 0.07568359375, "learning_rate": 0.0028647369001468066, "loss": 1.3305, "step": 2495 }, { "epoch": 0.218878668838865, "grad_norm": 0.083984375, "learning_rate": 0.002864558974903535, "loss": 1.2551, "step": 2496 }, { "epoch": 0.21896636061323954, "grad_norm": 0.095703125, "learning_rate": 0.0028643809388956202, "loss": 1.3135, "step": 2497 }, { "epoch": 0.21905405238761408, "grad_norm": 0.06787109375, "learning_rate": 0.0028642027921392996, "loss": 1.2749, "step": 2498 }, { "epoch": 0.21914174416198862, "grad_norm": 0.06103515625, "learning_rate": 0.0028640245346508177, "loss": 1.3496, "step": 2499 }, { "epoch": 0.21922943593636318, "grad_norm": 0.0947265625, "learning_rate": 0.0028638461664464328, "loss": 1.2953, "step": 2500 }, { "epoch": 0.21922943593636318, "eval_loss": 1.2839072942733765, "eval_runtime": 429.3461, "eval_samples_per_second": 33.649, "eval_steps_per_second": 8.413, "step": 2500 }, { "epoch": 0.21931712771073772, "grad_norm": 0.059326171875, "learning_rate": 0.002863667687542411, "loss": 1.2773, "step": 2501 }, { "epoch": 0.21940481948511226, "grad_norm": 0.1552734375, "learning_rate": 0.0028634890979550285, "loss": 1.2774, "step": 2502 }, { "epoch": 0.21949251125948682, "grad_norm": 0.107421875, "learning_rate": 0.002863310397700574, "loss": 1.192, "step": 2503 }, { "epoch": 0.21958020303386136, "grad_norm": 0.10205078125, "learning_rate": 0.0028631315867953424, "loss": 1.2296, "step": 2504 }, { "epoch": 0.2196678948082359, "grad_norm": 0.0712890625, "learning_rate": 0.0028629526652556424, "loss": 1.2763, "step": 2505 }, { "epoch": 0.21975558658261043, "grad_norm": 0.064453125, "learning_rate": 0.0028627736330977902, "loss": 1.2555, "step": 2506 }, { "epoch": 0.219843278356985, "grad_norm": 0.0888671875, "learning_rate": 0.0028625944903381134, "loss": 1.2349, "step": 2507 }, { "epoch": 0.21993097013135954, "grad_norm": 0.12109375, "learning_rate": 0.0028624152369929492, "loss": 1.3289, "step": 2508 }, { "epoch": 0.22001866190573408, "grad_norm": 0.07373046875, "learning_rate": 0.0028622358730786457, "loss": 1.2195, "step": 2509 }, { "epoch": 0.22010635368010864, "grad_norm": 0.12060546875, "learning_rate": 0.00286205639861156, "loss": 1.2496, "step": 2510 }, { "epoch": 0.22019404545448318, "grad_norm": 0.08203125, "learning_rate": 0.0028618768136080596, "loss": 1.2809, "step": 2511 }, { "epoch": 0.22028173722885772, "grad_norm": 0.0546875, "learning_rate": 0.0028616971180845216, "loss": 1.2965, "step": 2512 }, { "epoch": 0.22036942900323225, "grad_norm": 0.0703125, "learning_rate": 0.002861517312057335, "loss": 1.2842, "step": 2513 }, { "epoch": 0.22045712077760682, "grad_norm": 0.09814453125, "learning_rate": 0.0028613373955428964, "loss": 1.2929, "step": 2514 }, { "epoch": 0.22054481255198136, "grad_norm": 0.09130859375, "learning_rate": 0.002861157368557615, "loss": 1.254, "step": 2515 }, { "epoch": 0.2206325043263559, "grad_norm": 0.12255859375, "learning_rate": 0.0028609772311179085, "loss": 1.2807, "step": 2516 }, { "epoch": 0.22072019610073043, "grad_norm": 0.06103515625, "learning_rate": 0.002860796983240204, "loss": 1.2207, "step": 2517 }, { "epoch": 0.220807887875105, "grad_norm": 0.109375, "learning_rate": 0.0028606166249409404, "loss": 1.2588, "step": 2518 }, { "epoch": 0.22089557964947953, "grad_norm": 0.0654296875, "learning_rate": 0.002860436156236566, "loss": 1.2205, "step": 2519 }, { "epoch": 0.22098327142385407, "grad_norm": 0.1376953125, "learning_rate": 0.0028602555771435386, "loss": 1.2833, "step": 2520 }, { "epoch": 0.22107096319822864, "grad_norm": 0.0810546875, "learning_rate": 0.002860074887678327, "loss": 1.3351, "step": 2521 }, { "epoch": 0.22115865497260317, "grad_norm": 0.07763671875, "learning_rate": 0.0028598940878574097, "loss": 1.2278, "step": 2522 }, { "epoch": 0.2212463467469777, "grad_norm": 0.06982421875, "learning_rate": 0.0028597131776972743, "loss": 1.2737, "step": 2523 }, { "epoch": 0.22133403852135225, "grad_norm": 0.0791015625, "learning_rate": 0.0028595321572144204, "loss": 1.333, "step": 2524 }, { "epoch": 0.22142173029572682, "grad_norm": 0.0986328125, "learning_rate": 0.002859351026425356, "loss": 1.244, "step": 2525 }, { "epoch": 0.22150942207010135, "grad_norm": 0.072265625, "learning_rate": 0.0028591697853466, "loss": 1.2507, "step": 2526 }, { "epoch": 0.2215971138444759, "grad_norm": 0.1015625, "learning_rate": 0.002858988433994681, "loss": 1.2778, "step": 2527 }, { "epoch": 0.22168480561885046, "grad_norm": 0.10302734375, "learning_rate": 0.002858806972386138, "loss": 1.2435, "step": 2528 }, { "epoch": 0.221772497393225, "grad_norm": 0.1513671875, "learning_rate": 0.0028586254005375194, "loss": 1.2606, "step": 2529 }, { "epoch": 0.22186018916759953, "grad_norm": 0.07666015625, "learning_rate": 0.0028584437184653844, "loss": 1.3003, "step": 2530 }, { "epoch": 0.22194788094197407, "grad_norm": 0.1328125, "learning_rate": 0.0028582619261863017, "loss": 1.2145, "step": 2531 }, { "epoch": 0.22203557271634863, "grad_norm": 0.0791015625, "learning_rate": 0.0028580800237168504, "loss": 1.3083, "step": 2532 }, { "epoch": 0.22212326449072317, "grad_norm": 0.09765625, "learning_rate": 0.0028578980110736197, "loss": 1.2138, "step": 2533 }, { "epoch": 0.2222109562650977, "grad_norm": 0.083984375, "learning_rate": 0.0028577158882732087, "loss": 1.2795, "step": 2534 }, { "epoch": 0.22229864803947225, "grad_norm": 0.11328125, "learning_rate": 0.0028575336553322266, "loss": 1.2793, "step": 2535 }, { "epoch": 0.2223863398138468, "grad_norm": 0.078125, "learning_rate": 0.002857351312267292, "loss": 1.3062, "step": 2536 }, { "epoch": 0.22247403158822135, "grad_norm": 0.0791015625, "learning_rate": 0.0028571688590950345, "loss": 1.3316, "step": 2537 }, { "epoch": 0.2225617233625959, "grad_norm": 0.10205078125, "learning_rate": 0.0028569862958320934, "loss": 1.2118, "step": 2538 }, { "epoch": 0.22264941513697045, "grad_norm": 0.2138671875, "learning_rate": 0.0028568036224951177, "loss": 1.2736, "step": 2539 }, { "epoch": 0.222737106911345, "grad_norm": 0.07177734375, "learning_rate": 0.0028566208391007674, "loss": 1.3367, "step": 2540 }, { "epoch": 0.22282479868571953, "grad_norm": 0.07421875, "learning_rate": 0.002856437945665711, "loss": 1.2636, "step": 2541 }, { "epoch": 0.22291249046009406, "grad_norm": 0.06787109375, "learning_rate": 0.002856254942206629, "loss": 1.2908, "step": 2542 }, { "epoch": 0.22300018223446863, "grad_norm": 0.060791015625, "learning_rate": 0.0028560718287402097, "loss": 1.2373, "step": 2543 }, { "epoch": 0.22308787400884317, "grad_norm": 0.111328125, "learning_rate": 0.002855888605283153, "loss": 1.3339, "step": 2544 }, { "epoch": 0.2231755657832177, "grad_norm": 0.10009765625, "learning_rate": 0.002855705271852169, "loss": 1.23, "step": 2545 }, { "epoch": 0.22326325755759227, "grad_norm": 0.138671875, "learning_rate": 0.002855521828463976, "loss": 1.2622, "step": 2546 }, { "epoch": 0.2233509493319668, "grad_norm": 0.189453125, "learning_rate": 0.002855338275135305, "loss": 1.2435, "step": 2547 }, { "epoch": 0.22343864110634135, "grad_norm": 0.06982421875, "learning_rate": 0.002855154611882894, "loss": 1.3042, "step": 2548 }, { "epoch": 0.22352633288071588, "grad_norm": 0.11474609375, "learning_rate": 0.0028549708387234944, "loss": 1.3221, "step": 2549 }, { "epoch": 0.22361402465509045, "grad_norm": 0.1064453125, "learning_rate": 0.0028547869556738645, "loss": 1.3045, "step": 2550 }, { "epoch": 0.223701716429465, "grad_norm": 0.056640625, "learning_rate": 0.0028546029627507744, "loss": 1.3078, "step": 2551 }, { "epoch": 0.22378940820383952, "grad_norm": 0.1357421875, "learning_rate": 0.0028544188599710038, "loss": 1.2371, "step": 2552 }, { "epoch": 0.22387709997821406, "grad_norm": 0.10888671875, "learning_rate": 0.0028542346473513424, "loss": 1.2666, "step": 2553 }, { "epoch": 0.22396479175258863, "grad_norm": 0.07177734375, "learning_rate": 0.00285405032490859, "loss": 1.2028, "step": 2554 }, { "epoch": 0.22405248352696316, "grad_norm": 0.08056640625, "learning_rate": 0.0028538658926595558, "loss": 1.2008, "step": 2555 }, { "epoch": 0.2241401753013377, "grad_norm": 0.07470703125, "learning_rate": 0.0028536813506210602, "loss": 1.2686, "step": 2556 }, { "epoch": 0.22422786707571227, "grad_norm": 0.06591796875, "learning_rate": 0.0028534966988099327, "loss": 1.2622, "step": 2557 }, { "epoch": 0.2243155588500868, "grad_norm": 0.0703125, "learning_rate": 0.002853311937243013, "loss": 1.2612, "step": 2558 }, { "epoch": 0.22440325062446134, "grad_norm": 0.0693359375, "learning_rate": 0.002853127065937151, "loss": 1.3077, "step": 2559 }, { "epoch": 0.22449094239883588, "grad_norm": 0.06591796875, "learning_rate": 0.0028529420849092066, "loss": 1.2583, "step": 2560 }, { "epoch": 0.22457863417321045, "grad_norm": 0.09521484375, "learning_rate": 0.0028527569941760493, "loss": 1.2189, "step": 2561 }, { "epoch": 0.22466632594758498, "grad_norm": 0.08935546875, "learning_rate": 0.002852571793754559, "loss": 1.2586, "step": 2562 }, { "epoch": 0.22475401772195952, "grad_norm": 0.10400390625, "learning_rate": 0.0028523864836616257, "loss": 1.257, "step": 2563 }, { "epoch": 0.22484170949633409, "grad_norm": 0.10302734375, "learning_rate": 0.002852201063914149, "loss": 1.2459, "step": 2564 }, { "epoch": 0.22492940127070862, "grad_norm": 0.10400390625, "learning_rate": 0.002852015534529039, "loss": 1.2208, "step": 2565 }, { "epoch": 0.22501709304508316, "grad_norm": 0.16015625, "learning_rate": 0.0028518298955232148, "loss": 1.3184, "step": 2566 }, { "epoch": 0.2251047848194577, "grad_norm": 0.06298828125, "learning_rate": 0.0028516441469136067, "loss": 1.2948, "step": 2567 }, { "epoch": 0.22519247659383226, "grad_norm": 0.18359375, "learning_rate": 0.002851458288717154, "loss": 1.2686, "step": 2568 }, { "epoch": 0.2252801683682068, "grad_norm": 0.169921875, "learning_rate": 0.0028512723209508078, "loss": 1.2685, "step": 2569 }, { "epoch": 0.22536786014258134, "grad_norm": 0.0771484375, "learning_rate": 0.002851086243631527, "loss": 1.3103, "step": 2570 }, { "epoch": 0.22545555191695588, "grad_norm": 0.2421875, "learning_rate": 0.0028509000567762807, "loss": 1.2878, "step": 2571 }, { "epoch": 0.22554324369133044, "grad_norm": 0.13671875, "learning_rate": 0.00285071376040205, "loss": 1.2649, "step": 2572 }, { "epoch": 0.22563093546570498, "grad_norm": 0.1826171875, "learning_rate": 0.002850527354525823, "loss": 1.2265, "step": 2573 }, { "epoch": 0.22571862724007952, "grad_norm": 0.11328125, "learning_rate": 0.002850340839164601, "loss": 1.1954, "step": 2574 }, { "epoch": 0.22580631901445408, "grad_norm": 0.1884765625, "learning_rate": 0.002850154214335393, "loss": 1.2376, "step": 2575 }, { "epoch": 0.22589401078882862, "grad_norm": 0.1279296875, "learning_rate": 0.002849967480055219, "loss": 1.205, "step": 2576 }, { "epoch": 0.22598170256320316, "grad_norm": 0.0888671875, "learning_rate": 0.0028497806363411084, "loss": 1.2764, "step": 2577 }, { "epoch": 0.2260693943375777, "grad_norm": 0.061279296875, "learning_rate": 0.002849593683210101, "loss": 1.2639, "step": 2578 }, { "epoch": 0.22615708611195226, "grad_norm": 0.111328125, "learning_rate": 0.0028494066206792464, "loss": 1.3039, "step": 2579 }, { "epoch": 0.2262447778863268, "grad_norm": 0.06884765625, "learning_rate": 0.0028492194487656046, "loss": 1.3262, "step": 2580 }, { "epoch": 0.22633246966070134, "grad_norm": 0.076171875, "learning_rate": 0.0028490321674862445, "loss": 1.2642, "step": 2581 }, { "epoch": 0.2264201614350759, "grad_norm": 0.0751953125, "learning_rate": 0.0028488447768582463, "loss": 1.2583, "step": 2582 }, { "epoch": 0.22650785320945044, "grad_norm": 0.06494140625, "learning_rate": 0.0028486572768986985, "loss": 1.3174, "step": 2583 }, { "epoch": 0.22659554498382498, "grad_norm": 0.0888671875, "learning_rate": 0.0028484696676247022, "loss": 1.2816, "step": 2584 }, { "epoch": 0.2266832367581995, "grad_norm": 0.12890625, "learning_rate": 0.0028482819490533657, "loss": 1.3061, "step": 2585 }, { "epoch": 0.22677092853257408, "grad_norm": 0.0888671875, "learning_rate": 0.0028480941212018084, "loss": 1.2702, "step": 2586 }, { "epoch": 0.22685862030694862, "grad_norm": 0.1162109375, "learning_rate": 0.0028479061840871608, "loss": 1.2847, "step": 2587 }, { "epoch": 0.22694631208132315, "grad_norm": 0.0849609375, "learning_rate": 0.0028477181377265614, "loss": 1.3363, "step": 2588 }, { "epoch": 0.2270340038556977, "grad_norm": 0.08251953125, "learning_rate": 0.00284752998213716, "loss": 1.2976, "step": 2589 }, { "epoch": 0.22712169563007226, "grad_norm": 0.10693359375, "learning_rate": 0.002847341717336115, "loss": 1.3303, "step": 2590 }, { "epoch": 0.2272093874044468, "grad_norm": 0.0869140625, "learning_rate": 0.0028471533433405967, "loss": 1.2098, "step": 2591 }, { "epoch": 0.22729707917882133, "grad_norm": 0.173828125, "learning_rate": 0.002846964860167784, "loss": 1.2765, "step": 2592 }, { "epoch": 0.2273847709531959, "grad_norm": 0.07080078125, "learning_rate": 0.0028467762678348655, "loss": 1.2392, "step": 2593 }, { "epoch": 0.22747246272757043, "grad_norm": 0.1083984375, "learning_rate": 0.002846587566359041, "loss": 1.2401, "step": 2594 }, { "epoch": 0.22756015450194497, "grad_norm": 0.1083984375, "learning_rate": 0.0028463987557575195, "loss": 1.2599, "step": 2595 }, { "epoch": 0.2276478462763195, "grad_norm": 0.11669921875, "learning_rate": 0.0028462098360475204, "loss": 1.2821, "step": 2596 }, { "epoch": 0.22773553805069408, "grad_norm": 0.0703125, "learning_rate": 0.0028460208072462715, "loss": 1.284, "step": 2597 }, { "epoch": 0.2278232298250686, "grad_norm": 0.07666015625, "learning_rate": 0.002845831669371013, "loss": 1.2346, "step": 2598 }, { "epoch": 0.22791092159944315, "grad_norm": 0.06787109375, "learning_rate": 0.0028456424224389933, "loss": 1.3562, "step": 2599 }, { "epoch": 0.22799861337381772, "grad_norm": 0.11962890625, "learning_rate": 0.0028454530664674712, "loss": 1.2644, "step": 2600 }, { "epoch": 0.22808630514819225, "grad_norm": 0.06396484375, "learning_rate": 0.0028452636014737152, "loss": 1.2573, "step": 2601 }, { "epoch": 0.2281739969225668, "grad_norm": 0.06201171875, "learning_rate": 0.002845074027475005, "loss": 1.2158, "step": 2602 }, { "epoch": 0.22826168869694133, "grad_norm": 0.059814453125, "learning_rate": 0.0028448843444886275, "loss": 1.2906, "step": 2603 }, { "epoch": 0.2283493804713159, "grad_norm": 0.06640625, "learning_rate": 0.002844694552531883, "loss": 1.2097, "step": 2604 }, { "epoch": 0.22843707224569043, "grad_norm": 0.07861328125, "learning_rate": 0.0028445046516220795, "loss": 1.2526, "step": 2605 }, { "epoch": 0.22852476402006497, "grad_norm": 0.05859375, "learning_rate": 0.0028443146417765353, "loss": 1.2846, "step": 2606 }, { "epoch": 0.2286124557944395, "grad_norm": 0.07568359375, "learning_rate": 0.0028441245230125785, "loss": 1.297, "step": 2607 }, { "epoch": 0.22870014756881407, "grad_norm": 0.060791015625, "learning_rate": 0.002843934295347548, "loss": 1.3344, "step": 2608 }, { "epoch": 0.2287878393431886, "grad_norm": 0.0771484375, "learning_rate": 0.002843743958798792, "loss": 1.3272, "step": 2609 }, { "epoch": 0.22887553111756315, "grad_norm": 0.2041015625, "learning_rate": 0.0028435535133836684, "loss": 1.32, "step": 2610 }, { "epoch": 0.2289632228919377, "grad_norm": 0.2021484375, "learning_rate": 0.0028433629591195454, "loss": 1.2415, "step": 2611 }, { "epoch": 0.22905091466631225, "grad_norm": 0.1337890625, "learning_rate": 0.0028431722960238015, "loss": 1.3351, "step": 2612 }, { "epoch": 0.2291386064406868, "grad_norm": 0.28125, "learning_rate": 0.002842981524113825, "loss": 1.2289, "step": 2613 }, { "epoch": 0.22922629821506132, "grad_norm": 0.2001953125, "learning_rate": 0.002842790643407012, "loss": 1.3334, "step": 2614 }, { "epoch": 0.2293139899894359, "grad_norm": 0.42578125, "learning_rate": 0.0028425996539207716, "loss": 1.2828, "step": 2615 }, { "epoch": 0.22940168176381043, "grad_norm": 0.1259765625, "learning_rate": 0.0028424085556725217, "loss": 1.3462, "step": 2616 }, { "epoch": 0.22948937353818497, "grad_norm": 0.2578125, "learning_rate": 0.00284221734867969, "loss": 1.1924, "step": 2617 }, { "epoch": 0.22957706531255953, "grad_norm": 0.2265625, "learning_rate": 0.0028420260329597136, "loss": 1.2828, "step": 2618 }, { "epoch": 0.22966475708693407, "grad_norm": 0.208984375, "learning_rate": 0.0028418346085300407, "loss": 1.2892, "step": 2619 }, { "epoch": 0.2297524488613086, "grad_norm": 0.2138671875, "learning_rate": 0.0028416430754081273, "loss": 1.3176, "step": 2620 }, { "epoch": 0.22984014063568314, "grad_norm": 0.07861328125, "learning_rate": 0.002841451433611442, "loss": 1.3199, "step": 2621 }, { "epoch": 0.2299278324100577, "grad_norm": 0.271484375, "learning_rate": 0.0028412596831574616, "loss": 1.313, "step": 2622 }, { "epoch": 0.23001552418443225, "grad_norm": 0.10546875, "learning_rate": 0.0028410678240636737, "loss": 1.3054, "step": 2623 }, { "epoch": 0.23010321595880678, "grad_norm": 0.1357421875, "learning_rate": 0.0028408758563475745, "loss": 1.3462, "step": 2624 }, { "epoch": 0.23019090773318132, "grad_norm": 0.126953125, "learning_rate": 0.002840683780026672, "loss": 1.2036, "step": 2625 }, { "epoch": 0.2302785995075559, "grad_norm": 0.09716796875, "learning_rate": 0.0028404915951184824, "loss": 1.2451, "step": 2626 }, { "epoch": 0.23036629128193042, "grad_norm": 0.0888671875, "learning_rate": 0.0028402993016405317, "loss": 1.2752, "step": 2627 }, { "epoch": 0.23045398305630496, "grad_norm": 0.1123046875, "learning_rate": 0.0028401068996103582, "loss": 1.3176, "step": 2628 }, { "epoch": 0.23054167483067953, "grad_norm": 0.11279296875, "learning_rate": 0.0028399143890455074, "loss": 1.32, "step": 2629 }, { "epoch": 0.23062936660505406, "grad_norm": 0.09912109375, "learning_rate": 0.0028397217699635364, "loss": 1.2092, "step": 2630 }, { "epoch": 0.2307170583794286, "grad_norm": 0.059326171875, "learning_rate": 0.002839529042382011, "loss": 1.2997, "step": 2631 }, { "epoch": 0.23080475015380314, "grad_norm": 0.10400390625, "learning_rate": 0.002839336206318508, "loss": 1.2425, "step": 2632 }, { "epoch": 0.2308924419281777, "grad_norm": 0.0712890625, "learning_rate": 0.0028391432617906123, "loss": 1.2509, "step": 2633 }, { "epoch": 0.23098013370255224, "grad_norm": 0.06787109375, "learning_rate": 0.002838950208815922, "loss": 1.271, "step": 2634 }, { "epoch": 0.23106782547692678, "grad_norm": 0.07373046875, "learning_rate": 0.0028387570474120412, "loss": 1.2757, "step": 2635 }, { "epoch": 0.23115551725130135, "grad_norm": 0.06591796875, "learning_rate": 0.0028385637775965866, "loss": 1.2545, "step": 2636 }, { "epoch": 0.23124320902567588, "grad_norm": 0.0634765625, "learning_rate": 0.002838370399387184, "loss": 1.268, "step": 2637 }, { "epoch": 0.23133090080005042, "grad_norm": 0.060302734375, "learning_rate": 0.002838176912801468, "loss": 1.1594, "step": 2638 }, { "epoch": 0.23141859257442496, "grad_norm": 0.0849609375, "learning_rate": 0.002837983317857086, "loss": 1.3115, "step": 2639 }, { "epoch": 0.23150628434879952, "grad_norm": 0.11376953125, "learning_rate": 0.0028377896145716916, "loss": 1.3153, "step": 2640 }, { "epoch": 0.23159397612317406, "grad_norm": 0.08203125, "learning_rate": 0.0028375958029629505, "loss": 1.2658, "step": 2641 }, { "epoch": 0.2316816678975486, "grad_norm": 0.08544921875, "learning_rate": 0.0028374018830485377, "loss": 1.2252, "step": 2642 }, { "epoch": 0.23176935967192314, "grad_norm": 0.06591796875, "learning_rate": 0.0028372078548461388, "loss": 1.2833, "step": 2643 }, { "epoch": 0.2318570514462977, "grad_norm": 0.10498046875, "learning_rate": 0.002837013718373448, "loss": 1.2058, "step": 2644 }, { "epoch": 0.23194474322067224, "grad_norm": 0.07275390625, "learning_rate": 0.0028368194736481706, "loss": 1.3096, "step": 2645 }, { "epoch": 0.23203243499504678, "grad_norm": 0.07861328125, "learning_rate": 0.002836625120688021, "loss": 1.2662, "step": 2646 }, { "epoch": 0.23212012676942134, "grad_norm": 0.06298828125, "learning_rate": 0.002836430659510724, "loss": 1.3126, "step": 2647 }, { "epoch": 0.23220781854379588, "grad_norm": 0.0908203125, "learning_rate": 0.002836236090134013, "loss": 1.2268, "step": 2648 }, { "epoch": 0.23229551031817042, "grad_norm": 0.10302734375, "learning_rate": 0.002836041412575633, "loss": 1.2674, "step": 2649 }, { "epoch": 0.23238320209254495, "grad_norm": 0.076171875, "learning_rate": 0.0028358466268533383, "loss": 1.2746, "step": 2650 }, { "epoch": 0.23247089386691952, "grad_norm": 0.07861328125, "learning_rate": 0.0028356517329848918, "loss": 1.3213, "step": 2651 }, { "epoch": 0.23255858564129406, "grad_norm": 0.0908203125, "learning_rate": 0.0028354567309880683, "loss": 1.3167, "step": 2652 }, { "epoch": 0.2326462774156686, "grad_norm": 0.0859375, "learning_rate": 0.002835261620880651, "loss": 1.2255, "step": 2653 }, { "epoch": 0.23273396919004316, "grad_norm": 0.08154296875, "learning_rate": 0.002835066402680434, "loss": 1.3086, "step": 2654 }, { "epoch": 0.2328216609644177, "grad_norm": 0.142578125, "learning_rate": 0.00283487107640522, "loss": 1.2772, "step": 2655 }, { "epoch": 0.23290935273879224, "grad_norm": 0.07861328125, "learning_rate": 0.002834675642072823, "loss": 1.2289, "step": 2656 }, { "epoch": 0.23299704451316677, "grad_norm": 0.12060546875, "learning_rate": 0.0028344800997010654, "loss": 1.2955, "step": 2657 }, { "epoch": 0.23308473628754134, "grad_norm": 0.10888671875, "learning_rate": 0.00283428444930778, "loss": 1.2663, "step": 2658 }, { "epoch": 0.23317242806191588, "grad_norm": 0.1591796875, "learning_rate": 0.0028340886909108106, "loss": 1.2622, "step": 2659 }, { "epoch": 0.2332601198362904, "grad_norm": 0.244140625, "learning_rate": 0.002833892824528009, "loss": 1.2965, "step": 2660 }, { "epoch": 0.23334781161066495, "grad_norm": 0.054931640625, "learning_rate": 0.002833696850177238, "loss": 1.2658, "step": 2661 }, { "epoch": 0.23343550338503952, "grad_norm": 0.15625, "learning_rate": 0.00283350076787637, "loss": 1.252, "step": 2662 }, { "epoch": 0.23352319515941405, "grad_norm": 0.06396484375, "learning_rate": 0.0028333045776432874, "loss": 1.3064, "step": 2663 }, { "epoch": 0.2336108869337886, "grad_norm": 0.1064453125, "learning_rate": 0.002833108279495882, "loss": 1.2503, "step": 2664 }, { "epoch": 0.23369857870816316, "grad_norm": 0.07861328125, "learning_rate": 0.0028329118734520555, "loss": 1.3426, "step": 2665 }, { "epoch": 0.2337862704825377, "grad_norm": 0.07568359375, "learning_rate": 0.0028327153595297198, "loss": 1.3244, "step": 2666 }, { "epoch": 0.23387396225691223, "grad_norm": 0.07373046875, "learning_rate": 0.002832518737746797, "loss": 1.1724, "step": 2667 }, { "epoch": 0.23396165403128677, "grad_norm": 0.09326171875, "learning_rate": 0.0028323220081212168, "loss": 1.2767, "step": 2668 }, { "epoch": 0.23404934580566134, "grad_norm": 0.10888671875, "learning_rate": 0.0028321251706709223, "loss": 1.2618, "step": 2669 }, { "epoch": 0.23413703758003587, "grad_norm": 0.1376953125, "learning_rate": 0.0028319282254138643, "loss": 1.2485, "step": 2670 }, { "epoch": 0.2342247293544104, "grad_norm": 0.08935546875, "learning_rate": 0.0028317311723680023, "loss": 1.2144, "step": 2671 }, { "epoch": 0.23431242112878498, "grad_norm": 0.1875, "learning_rate": 0.0028315340115513087, "loss": 1.2625, "step": 2672 }, { "epoch": 0.2344001129031595, "grad_norm": 0.07470703125, "learning_rate": 0.002831336742981763, "loss": 1.2906, "step": 2673 }, { "epoch": 0.23448780467753405, "grad_norm": 0.146484375, "learning_rate": 0.0028311393666773563, "loss": 1.2211, "step": 2674 }, { "epoch": 0.2345754964519086, "grad_norm": 0.1259765625, "learning_rate": 0.0028309418826560884, "loss": 1.2899, "step": 2675 }, { "epoch": 0.23466318822628315, "grad_norm": 0.07177734375, "learning_rate": 0.0028307442909359694, "loss": 1.257, "step": 2676 }, { "epoch": 0.2347508800006577, "grad_norm": 0.1337890625, "learning_rate": 0.002830546591535019, "loss": 1.2841, "step": 2677 }, { "epoch": 0.23483857177503223, "grad_norm": 0.062255859375, "learning_rate": 0.002830348784471267, "loss": 1.304, "step": 2678 }, { "epoch": 0.23492626354940677, "grad_norm": 0.1591796875, "learning_rate": 0.0028301508697627527, "loss": 1.2786, "step": 2679 }, { "epoch": 0.23501395532378133, "grad_norm": 0.10546875, "learning_rate": 0.0028299528474275263, "loss": 1.3356, "step": 2680 }, { "epoch": 0.23510164709815587, "grad_norm": 0.11767578125, "learning_rate": 0.0028297547174836457, "loss": 1.2476, "step": 2681 }, { "epoch": 0.2351893388725304, "grad_norm": 0.07861328125, "learning_rate": 0.0028295564799491807, "loss": 1.2715, "step": 2682 }, { "epoch": 0.23527703064690497, "grad_norm": 0.10302734375, "learning_rate": 0.0028293581348422096, "loss": 1.1902, "step": 2683 }, { "epoch": 0.2353647224212795, "grad_norm": 0.07958984375, "learning_rate": 0.002829159682180821, "loss": 1.2296, "step": 2684 }, { "epoch": 0.23545241419565405, "grad_norm": 0.07373046875, "learning_rate": 0.0028289611219831133, "loss": 1.2261, "step": 2685 }, { "epoch": 0.23554010597002858, "grad_norm": 0.076171875, "learning_rate": 0.002828762454267195, "loss": 1.3104, "step": 2686 }, { "epoch": 0.23562779774440315, "grad_norm": 0.06298828125, "learning_rate": 0.002828563679051184, "loss": 1.2957, "step": 2687 }, { "epoch": 0.2357154895187777, "grad_norm": 0.0703125, "learning_rate": 0.002828364796353207, "loss": 1.2526, "step": 2688 }, { "epoch": 0.23580318129315223, "grad_norm": 0.0546875, "learning_rate": 0.0028281658061914032, "loss": 1.2008, "step": 2689 }, { "epoch": 0.2358908730675268, "grad_norm": 0.0751953125, "learning_rate": 0.0028279667085839197, "loss": 1.295, "step": 2690 }, { "epoch": 0.23597856484190133, "grad_norm": 0.0751953125, "learning_rate": 0.0028277675035489128, "loss": 1.238, "step": 2691 }, { "epoch": 0.23606625661627587, "grad_norm": 0.06591796875, "learning_rate": 0.0028275681911045496, "loss": 1.2475, "step": 2692 }, { "epoch": 0.2361539483906504, "grad_norm": 0.0966796875, "learning_rate": 0.002827368771269008, "loss": 1.2757, "step": 2693 }, { "epoch": 0.23624164016502497, "grad_norm": 0.08154296875, "learning_rate": 0.0028271692440604733, "loss": 1.3235, "step": 2694 }, { "epoch": 0.2363293319393995, "grad_norm": 0.1103515625, "learning_rate": 0.0028269696094971423, "loss": 1.2493, "step": 2695 }, { "epoch": 0.23641702371377404, "grad_norm": 0.05615234375, "learning_rate": 0.0028267698675972217, "loss": 1.2719, "step": 2696 }, { "epoch": 0.23650471548814858, "grad_norm": 0.1591796875, "learning_rate": 0.002826570018378927, "loss": 1.1847, "step": 2697 }, { "epoch": 0.23659240726252315, "grad_norm": 0.08056640625, "learning_rate": 0.0028263700618604832, "loss": 1.2694, "step": 2698 }, { "epoch": 0.23668009903689768, "grad_norm": 0.061767578125, "learning_rate": 0.0028261699980601275, "loss": 1.2511, "step": 2699 }, { "epoch": 0.23676779081127222, "grad_norm": 0.06787109375, "learning_rate": 0.0028259698269961033, "loss": 1.2349, "step": 2700 }, { "epoch": 0.2368554825856468, "grad_norm": 0.0859375, "learning_rate": 0.0028257695486866674, "loss": 1.274, "step": 2701 }, { "epoch": 0.23694317436002132, "grad_norm": 0.08837890625, "learning_rate": 0.0028255691631500837, "loss": 1.2792, "step": 2702 }, { "epoch": 0.23703086613439586, "grad_norm": 0.0751953125, "learning_rate": 0.0028253686704046268, "loss": 1.2594, "step": 2703 }, { "epoch": 0.2371185579087704, "grad_norm": 0.0927734375, "learning_rate": 0.0028251680704685814, "loss": 1.3044, "step": 2704 }, { "epoch": 0.23720624968314497, "grad_norm": 0.07177734375, "learning_rate": 0.002824967363360242, "loss": 1.2144, "step": 2705 }, { "epoch": 0.2372939414575195, "grad_norm": 0.06005859375, "learning_rate": 0.002824766549097912, "loss": 1.249, "step": 2706 }, { "epoch": 0.23738163323189404, "grad_norm": 0.06005859375, "learning_rate": 0.0028245656276999053, "loss": 1.2323, "step": 2707 }, { "epoch": 0.23746932500626858, "grad_norm": 0.060791015625, "learning_rate": 0.0028243645991845455, "loss": 1.2755, "step": 2708 }, { "epoch": 0.23755701678064314, "grad_norm": 0.087890625, "learning_rate": 0.002824163463570166, "loss": 1.2667, "step": 2709 }, { "epoch": 0.23764470855501768, "grad_norm": 0.1298828125, "learning_rate": 0.0028239622208751096, "loss": 1.2636, "step": 2710 }, { "epoch": 0.23773240032939222, "grad_norm": 0.06298828125, "learning_rate": 0.002823760871117729, "loss": 1.3619, "step": 2711 }, { "epoch": 0.23782009210376678, "grad_norm": 0.07568359375, "learning_rate": 0.0028235594143163874, "loss": 1.2934, "step": 2712 }, { "epoch": 0.23790778387814132, "grad_norm": 0.06689453125, "learning_rate": 0.0028233578504894565, "loss": 1.2738, "step": 2713 }, { "epoch": 0.23799547565251586, "grad_norm": 0.10791015625, "learning_rate": 0.0028231561796553187, "loss": 1.3069, "step": 2714 }, { "epoch": 0.2380831674268904, "grad_norm": 0.09423828125, "learning_rate": 0.0028229544018323655, "loss": 1.2787, "step": 2715 }, { "epoch": 0.23817085920126496, "grad_norm": 0.1376953125, "learning_rate": 0.0028227525170389993, "loss": 1.2675, "step": 2716 }, { "epoch": 0.2382585509756395, "grad_norm": 0.0810546875, "learning_rate": 0.0028225505252936307, "loss": 1.2909, "step": 2717 }, { "epoch": 0.23834624275001404, "grad_norm": 0.123046875, "learning_rate": 0.0028223484266146816, "loss": 1.2293, "step": 2718 }, { "epoch": 0.2384339345243886, "grad_norm": 0.119140625, "learning_rate": 0.002822146221020582, "loss": 1.3222, "step": 2719 }, { "epoch": 0.23852162629876314, "grad_norm": 0.248046875, "learning_rate": 0.0028219439085297726, "loss": 1.3099, "step": 2720 }, { "epoch": 0.23860931807313768, "grad_norm": 0.0693359375, "learning_rate": 0.0028217414891607046, "loss": 1.291, "step": 2721 }, { "epoch": 0.23869700984751221, "grad_norm": 0.2021484375, "learning_rate": 0.0028215389629318377, "loss": 1.3211, "step": 2722 }, { "epoch": 0.23878470162188678, "grad_norm": 0.16796875, "learning_rate": 0.0028213363298616413, "loss": 1.2889, "step": 2723 }, { "epoch": 0.23887239339626132, "grad_norm": 0.0810546875, "learning_rate": 0.0028211335899685957, "loss": 1.2288, "step": 2724 }, { "epoch": 0.23896008517063586, "grad_norm": 0.119140625, "learning_rate": 0.00282093074327119, "loss": 1.2616, "step": 2725 }, { "epoch": 0.2390477769450104, "grad_norm": 0.07080078125, "learning_rate": 0.002820727789787923, "loss": 1.3769, "step": 2726 }, { "epoch": 0.23913546871938496, "grad_norm": 0.08251953125, "learning_rate": 0.002820524729537304, "loss": 1.2831, "step": 2727 }, { "epoch": 0.2392231604937595, "grad_norm": 0.0712890625, "learning_rate": 0.0028203215625378507, "loss": 1.311, "step": 2728 }, { "epoch": 0.23931085226813403, "grad_norm": 0.0595703125, "learning_rate": 0.0028201182888080933, "loss": 1.2387, "step": 2729 }, { "epoch": 0.2393985440425086, "grad_norm": 0.0595703125, "learning_rate": 0.0028199149083665675, "loss": 1.213, "step": 2730 }, { "epoch": 0.23948623581688314, "grad_norm": 0.06005859375, "learning_rate": 0.0028197114212318223, "loss": 1.3127, "step": 2731 }, { "epoch": 0.23957392759125767, "grad_norm": 0.083984375, "learning_rate": 0.002819507827422416, "loss": 1.2351, "step": 2732 }, { "epoch": 0.2396616193656322, "grad_norm": 0.07177734375, "learning_rate": 0.0028193041269569136, "loss": 1.2884, "step": 2733 }, { "epoch": 0.23974931114000678, "grad_norm": 0.193359375, "learning_rate": 0.0028191003198538944, "loss": 1.2874, "step": 2734 }, { "epoch": 0.23983700291438131, "grad_norm": 0.1689453125, "learning_rate": 0.002818896406131944, "loss": 1.3266, "step": 2735 }, { "epoch": 0.23992469468875585, "grad_norm": 0.0927734375, "learning_rate": 0.0028186923858096583, "loss": 1.2497, "step": 2736 }, { "epoch": 0.24001238646313042, "grad_norm": 0.1748046875, "learning_rate": 0.002818488258905644, "loss": 1.2601, "step": 2737 }, { "epoch": 0.24010007823750495, "grad_norm": 0.0693359375, "learning_rate": 0.0028182840254385175, "loss": 1.2488, "step": 2738 }, { "epoch": 0.2401877700118795, "grad_norm": 0.125, "learning_rate": 0.0028180796854269034, "loss": 1.2787, "step": 2739 }, { "epoch": 0.24027546178625403, "grad_norm": 0.06591796875, "learning_rate": 0.0028178752388894374, "loss": 1.2143, "step": 2740 }, { "epoch": 0.2403631535606286, "grad_norm": 0.10302734375, "learning_rate": 0.0028176706858447646, "loss": 1.2732, "step": 2741 }, { "epoch": 0.24045084533500313, "grad_norm": 0.0654296875, "learning_rate": 0.002817466026311539, "loss": 1.2279, "step": 2742 }, { "epoch": 0.24053853710937767, "grad_norm": 0.12109375, "learning_rate": 0.002817261260308426, "loss": 1.2201, "step": 2743 }, { "epoch": 0.2406262288837522, "grad_norm": 0.10498046875, "learning_rate": 0.0028170563878541, "loss": 1.2774, "step": 2744 }, { "epoch": 0.24071392065812677, "grad_norm": 0.0712890625, "learning_rate": 0.0028168514089672433, "loss": 1.3314, "step": 2745 }, { "epoch": 0.2408016124325013, "grad_norm": 0.060546875, "learning_rate": 0.0028166463236665505, "loss": 1.2837, "step": 2746 }, { "epoch": 0.24088930420687585, "grad_norm": 0.0732421875, "learning_rate": 0.0028164411319707245, "loss": 1.2518, "step": 2747 }, { "epoch": 0.2409769959812504, "grad_norm": 0.08154296875, "learning_rate": 0.002816235833898479, "loss": 1.3564, "step": 2748 }, { "epoch": 0.24106468775562495, "grad_norm": 0.08837890625, "learning_rate": 0.002816030429468535, "loss": 1.2598, "step": 2749 }, { "epoch": 0.2411523795299995, "grad_norm": 0.06298828125, "learning_rate": 0.0028158249186996274, "loss": 1.2119, "step": 2750 }, { "epoch": 0.24124007130437403, "grad_norm": 0.0849609375, "learning_rate": 0.002815619301610496, "loss": 1.3071, "step": 2751 }, { "epoch": 0.2413277630787486, "grad_norm": 0.07861328125, "learning_rate": 0.002815413578219893, "loss": 1.2845, "step": 2752 }, { "epoch": 0.24141545485312313, "grad_norm": 0.1474609375, "learning_rate": 0.0028152077485465807, "loss": 1.2352, "step": 2753 }, { "epoch": 0.24150314662749767, "grad_norm": 0.068359375, "learning_rate": 0.0028150018126093294, "loss": 1.2846, "step": 2754 }, { "epoch": 0.24159083840187223, "grad_norm": 0.2119140625, "learning_rate": 0.0028147957704269208, "loss": 1.2527, "step": 2755 }, { "epoch": 0.24167853017624677, "grad_norm": 0.06982421875, "learning_rate": 0.0028145896220181437, "loss": 1.2897, "step": 2756 }, { "epoch": 0.2417662219506213, "grad_norm": 0.23046875, "learning_rate": 0.0028143833674018006, "loss": 1.3002, "step": 2757 }, { "epoch": 0.24185391372499584, "grad_norm": 0.0673828125, "learning_rate": 0.0028141770065966995, "loss": 1.2977, "step": 2758 }, { "epoch": 0.2419416054993704, "grad_norm": 0.2060546875, "learning_rate": 0.002813970539621661, "loss": 1.2757, "step": 2759 }, { "epoch": 0.24202929727374495, "grad_norm": 0.07373046875, "learning_rate": 0.002813763966495514, "loss": 1.2298, "step": 2760 }, { "epoch": 0.24211698904811949, "grad_norm": 0.08447265625, "learning_rate": 0.0028135572872370973, "loss": 1.2206, "step": 2761 }, { "epoch": 0.24220468082249402, "grad_norm": 0.0732421875, "learning_rate": 0.00281335050186526, "loss": 1.2542, "step": 2762 }, { "epoch": 0.2422923725968686, "grad_norm": 0.07568359375, "learning_rate": 0.00281314361039886, "loss": 1.2576, "step": 2763 }, { "epoch": 0.24238006437124313, "grad_norm": 0.1201171875, "learning_rate": 0.0028129366128567656, "loss": 1.2764, "step": 2764 }, { "epoch": 0.24246775614561766, "grad_norm": 0.057373046875, "learning_rate": 0.002812729509257854, "loss": 1.2704, "step": 2765 }, { "epoch": 0.24255544791999223, "grad_norm": 0.1142578125, "learning_rate": 0.002812522299621013, "loss": 1.2441, "step": 2766 }, { "epoch": 0.24264313969436677, "grad_norm": 0.10107421875, "learning_rate": 0.0028123149839651388, "loss": 1.2537, "step": 2767 }, { "epoch": 0.2427308314687413, "grad_norm": 0.068359375, "learning_rate": 0.002812107562309139, "loss": 1.2722, "step": 2768 }, { "epoch": 0.24281852324311584, "grad_norm": 0.130859375, "learning_rate": 0.002811900034671929, "loss": 1.2949, "step": 2769 }, { "epoch": 0.2429062150174904, "grad_norm": 0.05859375, "learning_rate": 0.0028116924010724354, "loss": 1.2408, "step": 2770 }, { "epoch": 0.24299390679186494, "grad_norm": 0.1611328125, "learning_rate": 0.0028114846615295945, "loss": 1.2719, "step": 2771 }, { "epoch": 0.24308159856623948, "grad_norm": 0.08447265625, "learning_rate": 0.0028112768160623498, "loss": 1.2429, "step": 2772 }, { "epoch": 0.24316929034061405, "grad_norm": 0.12890625, "learning_rate": 0.002811068864689658, "loss": 1.3171, "step": 2773 }, { "epoch": 0.24325698211498858, "grad_norm": 0.12109375, "learning_rate": 0.0028108608074304824, "loss": 1.2857, "step": 2774 }, { "epoch": 0.24334467388936312, "grad_norm": 0.17578125, "learning_rate": 0.0028106526443037985, "loss": 1.2729, "step": 2775 }, { "epoch": 0.24343236566373766, "grad_norm": 0.078125, "learning_rate": 0.002810444375328589, "loss": 1.2549, "step": 2776 }, { "epoch": 0.24352005743811223, "grad_norm": 0.1494140625, "learning_rate": 0.0028102360005238486, "loss": 1.2034, "step": 2777 }, { "epoch": 0.24360774921248676, "grad_norm": 0.06494140625, "learning_rate": 0.0028100275199085803, "loss": 1.299, "step": 2778 }, { "epoch": 0.2436954409868613, "grad_norm": 0.0888671875, "learning_rate": 0.002809818933501796, "loss": 1.1931, "step": 2779 }, { "epoch": 0.24378313276123584, "grad_norm": 0.06103515625, "learning_rate": 0.00280961024132252, "loss": 1.2471, "step": 2780 }, { "epoch": 0.2438708245356104, "grad_norm": 0.06982421875, "learning_rate": 0.002809401443389783, "loss": 1.2117, "step": 2781 }, { "epoch": 0.24395851630998494, "grad_norm": 0.0576171875, "learning_rate": 0.0028091925397226276, "loss": 1.2832, "step": 2782 }, { "epoch": 0.24404620808435948, "grad_norm": 0.07177734375, "learning_rate": 0.0028089835303401044, "loss": 1.3046, "step": 2783 }, { "epoch": 0.24413389985873404, "grad_norm": 0.08544921875, "learning_rate": 0.0028087744152612754, "loss": 1.2234, "step": 2784 }, { "epoch": 0.24422159163310858, "grad_norm": 0.080078125, "learning_rate": 0.0028085651945052113, "loss": 1.3026, "step": 2785 }, { "epoch": 0.24430928340748312, "grad_norm": 0.07568359375, "learning_rate": 0.002808355868090992, "loss": 1.2944, "step": 2786 }, { "epoch": 0.24439697518185766, "grad_norm": 0.099609375, "learning_rate": 0.002808146436037708, "loss": 1.2321, "step": 2787 }, { "epoch": 0.24448466695623222, "grad_norm": 0.0703125, "learning_rate": 0.002807936898364459, "loss": 1.2621, "step": 2788 }, { "epoch": 0.24457235873060676, "grad_norm": 0.07177734375, "learning_rate": 0.0028077272550903537, "loss": 1.2621, "step": 2789 }, { "epoch": 0.2446600505049813, "grad_norm": 0.08251953125, "learning_rate": 0.0028075175062345116, "loss": 1.2554, "step": 2790 }, { "epoch": 0.24474774227935586, "grad_norm": 0.08203125, "learning_rate": 0.002807307651816061, "loss": 1.2361, "step": 2791 }, { "epoch": 0.2448354340537304, "grad_norm": 0.09716796875, "learning_rate": 0.00280709769185414, "loss": 1.3131, "step": 2792 }, { "epoch": 0.24492312582810494, "grad_norm": 0.0576171875, "learning_rate": 0.002806887626367897, "loss": 1.2492, "step": 2793 }, { "epoch": 0.24501081760247947, "grad_norm": 0.08154296875, "learning_rate": 0.0028066774553764887, "loss": 1.2324, "step": 2794 }, { "epoch": 0.24509850937685404, "grad_norm": 0.07080078125, "learning_rate": 0.002806467178899082, "loss": 1.2251, "step": 2795 }, { "epoch": 0.24518620115122858, "grad_norm": 0.09814453125, "learning_rate": 0.0028062567969548546, "loss": 1.2711, "step": 2796 }, { "epoch": 0.24527389292560312, "grad_norm": 0.06591796875, "learning_rate": 0.002806046309562992, "loss": 1.2711, "step": 2797 }, { "epoch": 0.24536158469997765, "grad_norm": 0.11328125, "learning_rate": 0.00280583571674269, "loss": 1.2351, "step": 2798 }, { "epoch": 0.24544927647435222, "grad_norm": 0.1015625, "learning_rate": 0.0028056250185131545, "loss": 1.2993, "step": 2799 }, { "epoch": 0.24553696824872676, "grad_norm": 0.06787109375, "learning_rate": 0.0028054142148936007, "loss": 1.3047, "step": 2800 }, { "epoch": 0.2456246600231013, "grad_norm": 0.0966796875, "learning_rate": 0.002805203305903253, "loss": 1.2383, "step": 2801 }, { "epoch": 0.24571235179747586, "grad_norm": 0.0771484375, "learning_rate": 0.0028049922915613463, "loss": 1.2394, "step": 2802 }, { "epoch": 0.2458000435718504, "grad_norm": 0.109375, "learning_rate": 0.0028047811718871236, "loss": 1.2692, "step": 2803 }, { "epoch": 0.24588773534622493, "grad_norm": 0.197265625, "learning_rate": 0.0028045699468998396, "loss": 1.2898, "step": 2804 }, { "epoch": 0.24597542712059947, "grad_norm": 0.07177734375, "learning_rate": 0.0028043586166187568, "loss": 1.3129, "step": 2805 }, { "epoch": 0.24606311889497404, "grad_norm": 0.3359375, "learning_rate": 0.002804147181063148, "loss": 1.2381, "step": 2806 }, { "epoch": 0.24615081066934857, "grad_norm": 0.1455078125, "learning_rate": 0.002803935640252296, "loss": 1.21, "step": 2807 }, { "epoch": 0.2462385024437231, "grad_norm": 0.259765625, "learning_rate": 0.0028037239942054924, "loss": 1.2379, "step": 2808 }, { "epoch": 0.24632619421809768, "grad_norm": 0.10107421875, "learning_rate": 0.0028035122429420386, "loss": 1.2653, "step": 2809 }, { "epoch": 0.24641388599247221, "grad_norm": 0.181640625, "learning_rate": 0.002803300386481246, "loss": 1.2524, "step": 2810 }, { "epoch": 0.24650157776684675, "grad_norm": 0.080078125, "learning_rate": 0.002803088424842436, "loss": 1.3114, "step": 2811 }, { "epoch": 0.2465892695412213, "grad_norm": 0.162109375, "learning_rate": 0.0028028763580449376, "loss": 1.2442, "step": 2812 }, { "epoch": 0.24667696131559586, "grad_norm": 0.0654296875, "learning_rate": 0.002802664186108092, "loss": 1.2725, "step": 2813 }, { "epoch": 0.2467646530899704, "grad_norm": 0.20703125, "learning_rate": 0.002802451909051248, "loss": 1.3212, "step": 2814 }, { "epoch": 0.24685234486434493, "grad_norm": 0.0703125, "learning_rate": 0.002802239526893765, "loss": 1.2745, "step": 2815 }, { "epoch": 0.24694003663871947, "grad_norm": 0.10400390625, "learning_rate": 0.002802027039655012, "loss": 1.2492, "step": 2816 }, { "epoch": 0.24702772841309403, "grad_norm": 0.07568359375, "learning_rate": 0.002801814447354366, "loss": 1.23, "step": 2817 }, { "epoch": 0.24711542018746857, "grad_norm": 0.08154296875, "learning_rate": 0.0028016017500112162, "loss": 1.2519, "step": 2818 }, { "epoch": 0.2472031119618431, "grad_norm": 0.076171875, "learning_rate": 0.0028013889476449596, "loss": 1.2968, "step": 2819 }, { "epoch": 0.24729080373621767, "grad_norm": 0.11669921875, "learning_rate": 0.0028011760402750037, "loss": 1.2652, "step": 2820 }, { "epoch": 0.2473784955105922, "grad_norm": 0.1201171875, "learning_rate": 0.0028009630279207643, "loss": 1.2984, "step": 2821 }, { "epoch": 0.24746618728496675, "grad_norm": 0.06982421875, "learning_rate": 0.002800749910601668, "loss": 1.2632, "step": 2822 }, { "epoch": 0.2475538790593413, "grad_norm": 0.07763671875, "learning_rate": 0.00280053668833715, "loss": 1.2903, "step": 2823 }, { "epoch": 0.24764157083371585, "grad_norm": 0.078125, "learning_rate": 0.0028003233611466572, "loss": 1.2712, "step": 2824 }, { "epoch": 0.2477292626080904, "grad_norm": 0.06982421875, "learning_rate": 0.0028001099290496426, "loss": 1.2612, "step": 2825 }, { "epoch": 0.24781695438246493, "grad_norm": 0.06591796875, "learning_rate": 0.0027998963920655715, "loss": 1.2834, "step": 2826 }, { "epoch": 0.2479046461568395, "grad_norm": 0.10009765625, "learning_rate": 0.0027996827502139183, "loss": 1.2871, "step": 2827 }, { "epoch": 0.24799233793121403, "grad_norm": 0.1357421875, "learning_rate": 0.002799469003514166, "loss": 1.2606, "step": 2828 }, { "epoch": 0.24808002970558857, "grad_norm": 0.12890625, "learning_rate": 0.0027992551519858077, "loss": 1.2947, "step": 2829 }, { "epoch": 0.2481677214799631, "grad_norm": 0.10498046875, "learning_rate": 0.0027990411956483464, "loss": 1.2834, "step": 2830 }, { "epoch": 0.24825541325433767, "grad_norm": 0.265625, "learning_rate": 0.0027988271345212945, "loss": 1.2868, "step": 2831 }, { "epoch": 0.2483431050287122, "grad_norm": 0.064453125, "learning_rate": 0.0027986129686241737, "loss": 1.2753, "step": 2832 }, { "epoch": 0.24843079680308675, "grad_norm": 0.1865234375, "learning_rate": 0.0027983986979765152, "loss": 1.2711, "step": 2833 }, { "epoch": 0.24851848857746128, "grad_norm": 0.08203125, "learning_rate": 0.00279818432259786, "loss": 1.2634, "step": 2834 }, { "epoch": 0.24860618035183585, "grad_norm": 0.10302734375, "learning_rate": 0.0027979698425077584, "loss": 1.3294, "step": 2835 }, { "epoch": 0.24869387212621039, "grad_norm": 0.06494140625, "learning_rate": 0.0027977552577257707, "loss": 1.303, "step": 2836 }, { "epoch": 0.24878156390058492, "grad_norm": 0.0966796875, "learning_rate": 0.0027975405682714666, "loss": 1.2643, "step": 2837 }, { "epoch": 0.2488692556749595, "grad_norm": 0.08349609375, "learning_rate": 0.0027973257741644247, "loss": 1.2126, "step": 2838 }, { "epoch": 0.24895694744933403, "grad_norm": 0.06396484375, "learning_rate": 0.002797110875424234, "loss": 1.261, "step": 2839 }, { "epoch": 0.24904463922370856, "grad_norm": 0.11083984375, "learning_rate": 0.0027968958720704937, "loss": 1.2632, "step": 2840 }, { "epoch": 0.2491323309980831, "grad_norm": 0.095703125, "learning_rate": 0.0027966807641228095, "loss": 1.2804, "step": 2841 }, { "epoch": 0.24922002277245767, "grad_norm": 0.05810546875, "learning_rate": 0.0027964655516008, "loss": 1.2962, "step": 2842 }, { "epoch": 0.2493077145468322, "grad_norm": 0.09521484375, "learning_rate": 0.0027962502345240917, "loss": 1.2059, "step": 2843 }, { "epoch": 0.24939540632120674, "grad_norm": 0.064453125, "learning_rate": 0.002796034812912321, "loss": 1.3004, "step": 2844 }, { "epoch": 0.2494830980955813, "grad_norm": 0.1484375, "learning_rate": 0.002795819286785134, "loss": 1.2303, "step": 2845 }, { "epoch": 0.24957078986995584, "grad_norm": 0.05615234375, "learning_rate": 0.0027956036561621865, "loss": 1.2497, "step": 2846 }, { "epoch": 0.24965848164433038, "grad_norm": 0.1552734375, "learning_rate": 0.0027953879210631423, "loss": 1.3764, "step": 2847 }, { "epoch": 0.24974617341870492, "grad_norm": 0.12890625, "learning_rate": 0.0027951720815076763, "loss": 1.1885, "step": 2848 }, { "epoch": 0.24983386519307949, "grad_norm": 0.10107421875, "learning_rate": 0.002794956137515473, "loss": 1.2524, "step": 2849 }, { "epoch": 0.24992155696745402, "grad_norm": 0.126953125, "learning_rate": 0.002794740089106226, "loss": 1.2795, "step": 2850 }, { "epoch": 0.25000924874182856, "grad_norm": 0.0830078125, "learning_rate": 0.0027945239362996374, "loss": 1.229, "step": 2851 }, { "epoch": 0.2500969405162031, "grad_norm": 0.1171875, "learning_rate": 0.0027943076791154204, "loss": 1.2629, "step": 2852 }, { "epoch": 0.25018463229057764, "grad_norm": 0.09423828125, "learning_rate": 0.0027940913175732974, "loss": 1.2772, "step": 2853 }, { "epoch": 0.2502723240649522, "grad_norm": 0.06787109375, "learning_rate": 0.0027938748516929996, "loss": 1.2884, "step": 2854 }, { "epoch": 0.25036001583932677, "grad_norm": 0.10400390625, "learning_rate": 0.0027936582814942685, "loss": 1.2679, "step": 2855 }, { "epoch": 0.2504477076137013, "grad_norm": 0.0712890625, "learning_rate": 0.002793441606996854, "loss": 1.237, "step": 2856 }, { "epoch": 0.25053539938807584, "grad_norm": 0.08984375, "learning_rate": 0.002793224828220517, "loss": 1.239, "step": 2857 }, { "epoch": 0.2506230911624504, "grad_norm": 0.08642578125, "learning_rate": 0.0027930079451850267, "loss": 1.2561, "step": 2858 }, { "epoch": 0.2507107829368249, "grad_norm": 0.11474609375, "learning_rate": 0.0027927909579101625, "loss": 1.3079, "step": 2859 }, { "epoch": 0.2507984747111995, "grad_norm": 0.18359375, "learning_rate": 0.002792573866415713, "loss": 1.2583, "step": 2860 }, { "epoch": 0.250886166485574, "grad_norm": 0.0634765625, "learning_rate": 0.002792356670721477, "loss": 1.2411, "step": 2861 }, { "epoch": 0.25097385825994856, "grad_norm": 0.1298828125, "learning_rate": 0.002792139370847261, "loss": 1.2707, "step": 2862 }, { "epoch": 0.2510615500343231, "grad_norm": 0.06787109375, "learning_rate": 0.002791921966812883, "loss": 1.208, "step": 2863 }, { "epoch": 0.25114924180869763, "grad_norm": 0.08349609375, "learning_rate": 0.0027917044586381694, "loss": 1.2308, "step": 2864 }, { "epoch": 0.2512369335830722, "grad_norm": 0.0634765625, "learning_rate": 0.0027914868463429564, "loss": 1.278, "step": 2865 }, { "epoch": 0.25132462535744676, "grad_norm": 0.0791015625, "learning_rate": 0.00279126912994709, "loss": 1.2354, "step": 2866 }, { "epoch": 0.2514123171318213, "grad_norm": 0.06396484375, "learning_rate": 0.0027910513094704247, "loss": 1.2812, "step": 2867 }, { "epoch": 0.25150000890619584, "grad_norm": 0.08447265625, "learning_rate": 0.002790833384932826, "loss": 1.3067, "step": 2868 }, { "epoch": 0.2515877006805704, "grad_norm": 0.060302734375, "learning_rate": 0.0027906153563541673, "loss": 1.2382, "step": 2869 }, { "epoch": 0.2516753924549449, "grad_norm": 0.1376953125, "learning_rate": 0.002790397223754333, "loss": 1.2508, "step": 2870 }, { "epoch": 0.2517630842293195, "grad_norm": 0.076171875, "learning_rate": 0.0027901789871532154, "loss": 1.2781, "step": 2871 }, { "epoch": 0.25185077600369404, "grad_norm": 0.08349609375, "learning_rate": 0.0027899606465707177, "loss": 1.2556, "step": 2872 }, { "epoch": 0.25193846777806855, "grad_norm": 0.115234375, "learning_rate": 0.0027897422020267512, "loss": 1.2541, "step": 2873 }, { "epoch": 0.2520261595524431, "grad_norm": 0.0595703125, "learning_rate": 0.002789523653541239, "loss": 1.2664, "step": 2874 }, { "epoch": 0.25211385132681763, "grad_norm": 0.134765625, "learning_rate": 0.002789305001134111, "loss": 1.3111, "step": 2875 }, { "epoch": 0.2522015431011922, "grad_norm": 0.061767578125, "learning_rate": 0.0027890862448253085, "loss": 1.2506, "step": 2876 }, { "epoch": 0.25228923487556676, "grad_norm": 0.125, "learning_rate": 0.00278886738463478, "loss": 1.2468, "step": 2877 }, { "epoch": 0.25237692664994127, "grad_norm": 0.06787109375, "learning_rate": 0.0027886484205824867, "loss": 1.2409, "step": 2878 }, { "epoch": 0.25246461842431583, "grad_norm": 0.078125, "learning_rate": 0.002788429352688397, "loss": 1.2936, "step": 2879 }, { "epoch": 0.2525523101986904, "grad_norm": 0.06787109375, "learning_rate": 0.0027882101809724885, "loss": 1.2666, "step": 2880 }, { "epoch": 0.2526400019730649, "grad_norm": 0.07275390625, "learning_rate": 0.00278799090545475, "loss": 1.3166, "step": 2881 }, { "epoch": 0.2527276937474395, "grad_norm": 0.0947265625, "learning_rate": 0.002787771526155179, "loss": 1.2362, "step": 2882 }, { "epoch": 0.25281538552181404, "grad_norm": 0.0556640625, "learning_rate": 0.0027875520430937816, "loss": 1.2127, "step": 2883 }, { "epoch": 0.25290307729618855, "grad_norm": 0.064453125, "learning_rate": 0.0027873324562905743, "loss": 1.2095, "step": 2884 }, { "epoch": 0.2529907690705631, "grad_norm": 0.06640625, "learning_rate": 0.0027871127657655833, "loss": 1.2058, "step": 2885 }, { "epoch": 0.2530784608449376, "grad_norm": 0.0615234375, "learning_rate": 0.0027868929715388433, "loss": 1.2894, "step": 2886 }, { "epoch": 0.2531661526193122, "grad_norm": 0.08642578125, "learning_rate": 0.0027866730736303994, "loss": 1.2828, "step": 2887 }, { "epoch": 0.25325384439368676, "grad_norm": 0.08984375, "learning_rate": 0.0027864530720603056, "loss": 1.2565, "step": 2888 }, { "epoch": 0.25334153616806127, "grad_norm": 0.09033203125, "learning_rate": 0.0027862329668486246, "loss": 1.2625, "step": 2889 }, { "epoch": 0.25342922794243583, "grad_norm": 0.068359375, "learning_rate": 0.0027860127580154313, "loss": 1.259, "step": 2890 }, { "epoch": 0.2535169197168104, "grad_norm": 0.0927734375, "learning_rate": 0.0027857924455808062, "loss": 1.2435, "step": 2891 }, { "epoch": 0.2536046114911849, "grad_norm": 0.1220703125, "learning_rate": 0.0027855720295648424, "loss": 1.2101, "step": 2892 }, { "epoch": 0.25369230326555947, "grad_norm": 0.06298828125, "learning_rate": 0.002785351509987641, "loss": 1.344, "step": 2893 }, { "epoch": 0.25377999503993404, "grad_norm": 0.099609375, "learning_rate": 0.0027851308868693126, "loss": 1.3609, "step": 2894 }, { "epoch": 0.25386768681430855, "grad_norm": 0.061767578125, "learning_rate": 0.002784910160229978, "loss": 1.2355, "step": 2895 }, { "epoch": 0.2539553785886831, "grad_norm": 0.078125, "learning_rate": 0.002784689330089766, "loss": 1.2644, "step": 2896 }, { "epoch": 0.2540430703630576, "grad_norm": 0.1318359375, "learning_rate": 0.002784468396468817, "loss": 1.3094, "step": 2897 }, { "epoch": 0.2541307621374322, "grad_norm": 0.08203125, "learning_rate": 0.0027842473593872783, "loss": 1.2902, "step": 2898 }, { "epoch": 0.25421845391180675, "grad_norm": 0.1767578125, "learning_rate": 0.002784026218865309, "loss": 1.2248, "step": 2899 }, { "epoch": 0.25430614568618126, "grad_norm": 0.10791015625, "learning_rate": 0.0027838049749230754, "loss": 1.2901, "step": 2900 }, { "epoch": 0.2543938374605558, "grad_norm": 0.1181640625, "learning_rate": 0.0027835836275807552, "loss": 1.1911, "step": 2901 }, { "epoch": 0.2544815292349304, "grad_norm": 0.11865234375, "learning_rate": 0.0027833621768585345, "loss": 1.2482, "step": 2902 }, { "epoch": 0.2545692210093049, "grad_norm": 0.08056640625, "learning_rate": 0.002783140622776609, "loss": 1.259, "step": 2903 }, { "epoch": 0.25465691278367947, "grad_norm": 0.107421875, "learning_rate": 0.0027829189653551844, "loss": 1.2409, "step": 2904 }, { "epoch": 0.25474460455805403, "grad_norm": 0.130859375, "learning_rate": 0.002782697204614474, "loss": 1.3277, "step": 2905 }, { "epoch": 0.25483229633242854, "grad_norm": 0.125, "learning_rate": 0.0027824753405747025, "loss": 1.2635, "step": 2906 }, { "epoch": 0.2549199881068031, "grad_norm": 0.1435546875, "learning_rate": 0.002782253373256104, "loss": 1.3402, "step": 2907 }, { "epoch": 0.2550076798811776, "grad_norm": 0.25390625, "learning_rate": 0.002782031302678921, "loss": 1.2706, "step": 2908 }, { "epoch": 0.2550953716555522, "grad_norm": 0.0751953125, "learning_rate": 0.002781809128863405, "loss": 1.2432, "step": 2909 }, { "epoch": 0.25518306342992675, "grad_norm": 0.236328125, "learning_rate": 0.002781586851829818, "loss": 1.2845, "step": 2910 }, { "epoch": 0.25527075520430126, "grad_norm": 0.10546875, "learning_rate": 0.002781364471598432, "loss": 1.2617, "step": 2911 }, { "epoch": 0.2553584469786758, "grad_norm": 0.232421875, "learning_rate": 0.0027811419881895263, "loss": 1.3065, "step": 2912 }, { "epoch": 0.2554461387530504, "grad_norm": 0.103515625, "learning_rate": 0.002780919401623391, "loss": 1.288, "step": 2913 }, { "epoch": 0.2555338305274249, "grad_norm": 0.171875, "learning_rate": 0.0027806967119203265, "loss": 1.2696, "step": 2914 }, { "epoch": 0.25562152230179946, "grad_norm": 0.1484375, "learning_rate": 0.0027804739191006405, "loss": 1.2681, "step": 2915 }, { "epoch": 0.25570921407617403, "grad_norm": 0.146484375, "learning_rate": 0.002780251023184651, "loss": 1.2336, "step": 2916 }, { "epoch": 0.25579690585054854, "grad_norm": 0.17578125, "learning_rate": 0.0027800280241926864, "loss": 1.2208, "step": 2917 }, { "epoch": 0.2558845976249231, "grad_norm": 0.0810546875, "learning_rate": 0.002779804922145083, "loss": 1.2677, "step": 2918 }, { "epoch": 0.25597228939929767, "grad_norm": 0.1943359375, "learning_rate": 0.0027795817170621867, "loss": 1.2556, "step": 2919 }, { "epoch": 0.2560599811736722, "grad_norm": 0.060546875, "learning_rate": 0.0027793584089643546, "loss": 1.3013, "step": 2920 }, { "epoch": 0.25614767294804675, "grad_norm": 0.181640625, "learning_rate": 0.0027791349978719504, "loss": 1.2536, "step": 2921 }, { "epoch": 0.25623536472242125, "grad_norm": 0.1171875, "learning_rate": 0.0027789114838053497, "loss": 1.2655, "step": 2922 }, { "epoch": 0.2563230564967958, "grad_norm": 0.2275390625, "learning_rate": 0.0027786878667849357, "loss": 1.2595, "step": 2923 }, { "epoch": 0.2564107482711704, "grad_norm": 0.1328125, "learning_rate": 0.0027784641468311024, "loss": 1.3043, "step": 2924 }, { "epoch": 0.2564984400455449, "grad_norm": 0.1484375, "learning_rate": 0.0027782403239642517, "loss": 1.273, "step": 2925 }, { "epoch": 0.25658613181991946, "grad_norm": 0.1591796875, "learning_rate": 0.002778016398204796, "loss": 1.291, "step": 2926 }, { "epoch": 0.256673823594294, "grad_norm": 0.18359375, "learning_rate": 0.0027777923695731566, "loss": 1.2489, "step": 2927 }, { "epoch": 0.25676151536866854, "grad_norm": 0.08837890625, "learning_rate": 0.0027775682380897647, "loss": 1.3259, "step": 2928 }, { "epoch": 0.2568492071430431, "grad_norm": 0.1396484375, "learning_rate": 0.00277734400377506, "loss": 1.2456, "step": 2929 }, { "epoch": 0.25693689891741767, "grad_norm": 0.1044921875, "learning_rate": 0.0027771196666494928, "loss": 1.2707, "step": 2930 }, { "epoch": 0.2570245906917922, "grad_norm": 0.0693359375, "learning_rate": 0.0027768952267335214, "loss": 1.2144, "step": 2931 }, { "epoch": 0.25711228246616674, "grad_norm": 0.1875, "learning_rate": 0.002776670684047614, "loss": 1.2658, "step": 2932 }, { "epoch": 0.25719997424054125, "grad_norm": 0.0791015625, "learning_rate": 0.0027764460386122494, "loss": 1.1995, "step": 2933 }, { "epoch": 0.2572876660149158, "grad_norm": 0.17578125, "learning_rate": 0.0027762212904479137, "loss": 1.2849, "step": 2934 }, { "epoch": 0.2573753577892904, "grad_norm": 0.08251953125, "learning_rate": 0.002775996439575103, "loss": 1.2465, "step": 2935 }, { "epoch": 0.2574630495636649, "grad_norm": 0.1142578125, "learning_rate": 0.0027757714860143245, "loss": 1.2832, "step": 2936 }, { "epoch": 0.25755074133803946, "grad_norm": 0.0849609375, "learning_rate": 0.002775546429786093, "loss": 1.258, "step": 2937 }, { "epoch": 0.257638433112414, "grad_norm": 0.08154296875, "learning_rate": 0.0027753212709109316, "loss": 1.3144, "step": 2938 }, { "epoch": 0.25772612488678853, "grad_norm": 0.10595703125, "learning_rate": 0.0027750960094093764, "loss": 1.1999, "step": 2939 }, { "epoch": 0.2578138166611631, "grad_norm": 0.06298828125, "learning_rate": 0.002774870645301969, "loss": 1.2194, "step": 2940 }, { "epoch": 0.25790150843553766, "grad_norm": 0.09619140625, "learning_rate": 0.0027746451786092624, "loss": 1.3548, "step": 2941 }, { "epoch": 0.2579892002099122, "grad_norm": 0.06201171875, "learning_rate": 0.0027744196093518188, "loss": 1.2756, "step": 2942 }, { "epoch": 0.25807689198428674, "grad_norm": 0.08056640625, "learning_rate": 0.0027741939375502097, "loss": 1.2912, "step": 2943 }, { "epoch": 0.25816458375866125, "grad_norm": 0.09033203125, "learning_rate": 0.0027739681632250155, "loss": 1.2961, "step": 2944 }, { "epoch": 0.2582522755330358, "grad_norm": 0.0859375, "learning_rate": 0.002773742286396827, "loss": 1.2394, "step": 2945 }, { "epoch": 0.2583399673074104, "grad_norm": 0.11083984375, "learning_rate": 0.002773516307086242, "loss": 1.2804, "step": 2946 }, { "epoch": 0.2584276590817849, "grad_norm": 0.06884765625, "learning_rate": 0.0027732902253138707, "loss": 1.3029, "step": 2947 }, { "epoch": 0.25851535085615945, "grad_norm": 0.1494140625, "learning_rate": 0.00277306404110033, "loss": 1.2285, "step": 2948 }, { "epoch": 0.258603042630534, "grad_norm": 0.068359375, "learning_rate": 0.0027728377544662484, "loss": 1.2661, "step": 2949 }, { "epoch": 0.25869073440490853, "grad_norm": 0.11474609375, "learning_rate": 0.0027726113654322616, "loss": 1.1898, "step": 2950 }, { "epoch": 0.2587784261792831, "grad_norm": 0.0615234375, "learning_rate": 0.002772384874019017, "loss": 1.2992, "step": 2951 }, { "epoch": 0.25886611795365766, "grad_norm": 0.158203125, "learning_rate": 0.0027721582802471695, "loss": 1.2735, "step": 2952 }, { "epoch": 0.25895380972803217, "grad_norm": 0.05859375, "learning_rate": 0.0027719315841373826, "loss": 1.1913, "step": 2953 }, { "epoch": 0.25904150150240673, "grad_norm": 0.1533203125, "learning_rate": 0.0027717047857103326, "loss": 1.2928, "step": 2954 }, { "epoch": 0.2591291932767813, "grad_norm": 0.06689453125, "learning_rate": 0.0027714778849867016, "loss": 1.1701, "step": 2955 }, { "epoch": 0.2592168850511558, "grad_norm": 0.095703125, "learning_rate": 0.002771250881987182, "loss": 1.3192, "step": 2956 }, { "epoch": 0.2593045768255304, "grad_norm": 0.06689453125, "learning_rate": 0.002771023776732477, "loss": 1.229, "step": 2957 }, { "epoch": 0.2593922685999049, "grad_norm": 0.0947265625, "learning_rate": 0.002770796569243297, "loss": 1.2868, "step": 2958 }, { "epoch": 0.25947996037427945, "grad_norm": 0.07421875, "learning_rate": 0.0027705692595403632, "loss": 1.2352, "step": 2959 }, { "epoch": 0.259567652148654, "grad_norm": 0.09619140625, "learning_rate": 0.002770341847644406, "loss": 1.2917, "step": 2960 }, { "epoch": 0.2596553439230285, "grad_norm": 0.10400390625, "learning_rate": 0.0027701143335761643, "loss": 1.2669, "step": 2961 }, { "epoch": 0.2597430356974031, "grad_norm": 0.0537109375, "learning_rate": 0.002769886717356387, "loss": 1.2244, "step": 2962 }, { "epoch": 0.25983072747177766, "grad_norm": 0.07421875, "learning_rate": 0.002769658999005832, "loss": 1.2034, "step": 2963 }, { "epoch": 0.25991841924615217, "grad_norm": 0.09716796875, "learning_rate": 0.0027694311785452673, "loss": 1.2661, "step": 2964 }, { "epoch": 0.26000611102052673, "grad_norm": 0.12109375, "learning_rate": 0.002769203255995468, "loss": 1.2605, "step": 2965 }, { "epoch": 0.2600938027949013, "grad_norm": 0.05859375, "learning_rate": 0.002768975231377221, "loss": 1.3732, "step": 2966 }, { "epoch": 0.2601814945692758, "grad_norm": 0.0732421875, "learning_rate": 0.002768747104711322, "loss": 1.216, "step": 2967 }, { "epoch": 0.26026918634365037, "grad_norm": 0.072265625, "learning_rate": 0.0027685188760185747, "loss": 1.2962, "step": 2968 }, { "epoch": 0.2603568781180249, "grad_norm": 0.0869140625, "learning_rate": 0.0027682905453197937, "loss": 1.2969, "step": 2969 }, { "epoch": 0.26044456989239945, "grad_norm": 0.11572265625, "learning_rate": 0.0027680621126358017, "loss": 1.3167, "step": 2970 }, { "epoch": 0.260532261666774, "grad_norm": 0.08056640625, "learning_rate": 0.002767833577987431, "loss": 1.2855, "step": 2971 }, { "epoch": 0.2606199534411485, "grad_norm": 0.05859375, "learning_rate": 0.0027676049413955244, "loss": 1.2864, "step": 2972 }, { "epoch": 0.2607076452155231, "grad_norm": 0.0634765625, "learning_rate": 0.002767376202880931, "loss": 1.2312, "step": 2973 }, { "epoch": 0.26079533698989765, "grad_norm": 0.06103515625, "learning_rate": 0.002767147362464514, "loss": 1.2635, "step": 2974 }, { "epoch": 0.26088302876427216, "grad_norm": 0.0712890625, "learning_rate": 0.0027669184201671395, "loss": 1.2771, "step": 2975 }, { "epoch": 0.2609707205386467, "grad_norm": 0.0654296875, "learning_rate": 0.0027666893760096897, "loss": 1.2531, "step": 2976 }, { "epoch": 0.2610584123130213, "grad_norm": 0.080078125, "learning_rate": 0.0027664602300130517, "loss": 1.2772, "step": 2977 }, { "epoch": 0.2611461040873958, "grad_norm": 0.0751953125, "learning_rate": 0.0027662309821981225, "loss": 1.2692, "step": 2978 }, { "epoch": 0.26123379586177037, "grad_norm": 0.052978515625, "learning_rate": 0.002766001632585809, "loss": 1.2461, "step": 2979 }, { "epoch": 0.2613214876361449, "grad_norm": 0.080078125, "learning_rate": 0.0027657721811970275, "loss": 1.2318, "step": 2980 }, { "epoch": 0.26140917941051944, "grad_norm": 0.06103515625, "learning_rate": 0.0027655426280527038, "loss": 1.2424, "step": 2981 }, { "epoch": 0.261496871184894, "grad_norm": 0.052001953125, "learning_rate": 0.0027653129731737723, "loss": 1.2721, "step": 2982 }, { "epoch": 0.2615845629592685, "grad_norm": 0.10107421875, "learning_rate": 0.0027650832165811764, "loss": 1.2572, "step": 2983 }, { "epoch": 0.2616722547336431, "grad_norm": 0.06689453125, "learning_rate": 0.0027648533582958702, "loss": 1.253, "step": 2984 }, { "epoch": 0.26175994650801765, "grad_norm": 0.06640625, "learning_rate": 0.002764623398338815, "loss": 1.272, "step": 2985 }, { "epoch": 0.26184763828239216, "grad_norm": 0.0830078125, "learning_rate": 0.0027643933367309838, "loss": 1.2042, "step": 2986 }, { "epoch": 0.2619353300567667, "grad_norm": 0.07470703125, "learning_rate": 0.0027641631734933564, "loss": 1.3408, "step": 2987 }, { "epoch": 0.2620230218311413, "grad_norm": 0.103515625, "learning_rate": 0.0027639329086469244, "loss": 1.2538, "step": 2988 }, { "epoch": 0.2621107136055158, "grad_norm": 0.07373046875, "learning_rate": 0.002763702542212687, "loss": 1.2553, "step": 2989 }, { "epoch": 0.26219840537989036, "grad_norm": 0.08154296875, "learning_rate": 0.002763472074211652, "loss": 1.2925, "step": 2990 }, { "epoch": 0.26228609715426493, "grad_norm": 0.12158203125, "learning_rate": 0.0027632415046648386, "loss": 1.2794, "step": 2991 }, { "epoch": 0.26237378892863944, "grad_norm": 0.0751953125, "learning_rate": 0.0027630108335932745, "loss": 1.2362, "step": 2992 }, { "epoch": 0.262461480703014, "grad_norm": 0.205078125, "learning_rate": 0.0027627800610179943, "loss": 1.2579, "step": 2993 }, { "epoch": 0.2625491724773885, "grad_norm": 0.0986328125, "learning_rate": 0.002762549186960046, "loss": 1.2687, "step": 2994 }, { "epoch": 0.2626368642517631, "grad_norm": 0.07470703125, "learning_rate": 0.0027623182114404838, "loss": 1.2331, "step": 2995 }, { "epoch": 0.26272455602613765, "grad_norm": 0.07861328125, "learning_rate": 0.0027620871344803716, "loss": 1.2568, "step": 2996 }, { "epoch": 0.26281224780051216, "grad_norm": 0.0966796875, "learning_rate": 0.0027618559561007837, "loss": 1.2477, "step": 2997 }, { "epoch": 0.2628999395748867, "grad_norm": 0.08837890625, "learning_rate": 0.0027616246763228036, "loss": 1.2465, "step": 2998 }, { "epoch": 0.2629876313492613, "grad_norm": 0.09765625, "learning_rate": 0.0027613932951675224, "loss": 1.2742, "step": 2999 }, { "epoch": 0.2630753231236358, "grad_norm": 0.1416015625, "learning_rate": 0.0027611618126560417, "loss": 1.2651, "step": 3000 }, { "epoch": 0.2630753231236358, "eval_loss": 1.272948145866394, "eval_runtime": 429.2563, "eval_samples_per_second": 33.656, "eval_steps_per_second": 8.415, "step": 3000 }, { "epoch": 0.26316301489801036, "grad_norm": 0.07421875, "learning_rate": 0.002760930228809472, "loss": 1.3404, "step": 3001 }, { "epoch": 0.2632507066723849, "grad_norm": 0.166015625, "learning_rate": 0.002760698543648933, "loss": 1.248, "step": 3002 }, { "epoch": 0.26333839844675944, "grad_norm": 0.08837890625, "learning_rate": 0.002760466757195554, "loss": 1.2962, "step": 3003 }, { "epoch": 0.263426090221134, "grad_norm": 0.09912109375, "learning_rate": 0.0027602348694704738, "loss": 1.2266, "step": 3004 }, { "epoch": 0.2635137819955085, "grad_norm": 0.1064453125, "learning_rate": 0.0027600028804948395, "loss": 1.3335, "step": 3005 }, { "epoch": 0.2636014737698831, "grad_norm": 0.12353515625, "learning_rate": 0.0027597707902898084, "loss": 1.2754, "step": 3006 }, { "epoch": 0.26368916554425764, "grad_norm": 0.1865234375, "learning_rate": 0.002759538598876545, "loss": 1.2434, "step": 3007 }, { "epoch": 0.26377685731863215, "grad_norm": 0.068359375, "learning_rate": 0.0027593063062762266, "loss": 1.2797, "step": 3008 }, { "epoch": 0.2638645490930067, "grad_norm": 0.1904296875, "learning_rate": 0.002759073912510036, "loss": 1.3289, "step": 3009 }, { "epoch": 0.2639522408673813, "grad_norm": 0.154296875, "learning_rate": 0.002758841417599168, "loss": 1.2697, "step": 3010 }, { "epoch": 0.2640399326417558, "grad_norm": 0.0859375, "learning_rate": 0.0027586088215648254, "loss": 1.2396, "step": 3011 }, { "epoch": 0.26412762441613036, "grad_norm": 0.15625, "learning_rate": 0.00275837612442822, "loss": 1.3153, "step": 3012 }, { "epoch": 0.2642153161905049, "grad_norm": 0.09765625, "learning_rate": 0.002758143326210573, "loss": 1.237, "step": 3013 }, { "epoch": 0.26430300796487943, "grad_norm": 0.111328125, "learning_rate": 0.0027579104269331157, "loss": 1.2794, "step": 3014 }, { "epoch": 0.264390699739254, "grad_norm": 0.166015625, "learning_rate": 0.0027576774266170876, "loss": 1.2634, "step": 3015 }, { "epoch": 0.2644783915136285, "grad_norm": 0.0615234375, "learning_rate": 0.0027574443252837374, "loss": 1.2334, "step": 3016 }, { "epoch": 0.2645660832880031, "grad_norm": 0.11376953125, "learning_rate": 0.0027572111229543232, "loss": 1.2214, "step": 3017 }, { "epoch": 0.26465377506237764, "grad_norm": 0.0615234375, "learning_rate": 0.0027569778196501137, "loss": 1.2419, "step": 3018 }, { "epoch": 0.26474146683675215, "grad_norm": 0.09521484375, "learning_rate": 0.0027567444153923843, "loss": 1.281, "step": 3019 }, { "epoch": 0.2648291586111267, "grad_norm": 0.055419921875, "learning_rate": 0.0027565109102024206, "loss": 1.2851, "step": 3020 }, { "epoch": 0.2649168503855013, "grad_norm": 0.072265625, "learning_rate": 0.0027562773041015193, "loss": 1.2762, "step": 3021 }, { "epoch": 0.2650045421598758, "grad_norm": 0.06982421875, "learning_rate": 0.002756043597110983, "loss": 1.2491, "step": 3022 }, { "epoch": 0.26509223393425035, "grad_norm": 0.07421875, "learning_rate": 0.0027558097892521265, "loss": 1.3058, "step": 3023 }, { "epoch": 0.2651799257086249, "grad_norm": 0.123046875, "learning_rate": 0.002755575880546272, "loss": 1.2851, "step": 3024 }, { "epoch": 0.26526761748299943, "grad_norm": 0.06982421875, "learning_rate": 0.0027553418710147505, "loss": 1.2856, "step": 3025 }, { "epoch": 0.265355309257374, "grad_norm": 0.09228515625, "learning_rate": 0.002755107760678904, "loss": 1.266, "step": 3026 }, { "epoch": 0.26544300103174856, "grad_norm": 0.0888671875, "learning_rate": 0.002754873549560083, "loss": 1.2387, "step": 3027 }, { "epoch": 0.26553069280612307, "grad_norm": 0.064453125, "learning_rate": 0.002754639237679646, "loss": 1.2635, "step": 3028 }, { "epoch": 0.26561838458049764, "grad_norm": 0.0634765625, "learning_rate": 0.0027544048250589623, "loss": 1.2462, "step": 3029 }, { "epoch": 0.26570607635487214, "grad_norm": 0.091796875, "learning_rate": 0.002754170311719409, "loss": 1.2656, "step": 3030 }, { "epoch": 0.2657937681292467, "grad_norm": 0.06982421875, "learning_rate": 0.0027539356976823744, "loss": 1.2193, "step": 3031 }, { "epoch": 0.2658814599036213, "grad_norm": 0.09814453125, "learning_rate": 0.0027537009829692537, "loss": 1.2035, "step": 3032 }, { "epoch": 0.2659691516779958, "grad_norm": 0.08056640625, "learning_rate": 0.0027534661676014527, "loss": 1.2506, "step": 3033 }, { "epoch": 0.26605684345237035, "grad_norm": 0.0947265625, "learning_rate": 0.002753231251600386, "loss": 1.2938, "step": 3034 }, { "epoch": 0.2661445352267449, "grad_norm": 0.0927734375, "learning_rate": 0.0027529962349874767, "loss": 1.2583, "step": 3035 }, { "epoch": 0.2662322270011194, "grad_norm": 0.0869140625, "learning_rate": 0.002752761117784158, "loss": 1.2916, "step": 3036 }, { "epoch": 0.266319918775494, "grad_norm": 0.130859375, "learning_rate": 0.0027525259000118723, "loss": 1.2749, "step": 3037 }, { "epoch": 0.26640761054986856, "grad_norm": 0.0693359375, "learning_rate": 0.002752290581692071, "loss": 1.2119, "step": 3038 }, { "epoch": 0.26649530232424307, "grad_norm": 0.06640625, "learning_rate": 0.002752055162846214, "loss": 1.2304, "step": 3039 }, { "epoch": 0.26658299409861763, "grad_norm": 0.08447265625, "learning_rate": 0.002751819643495771, "loss": 1.2892, "step": 3040 }, { "epoch": 0.26667068587299214, "grad_norm": 0.115234375, "learning_rate": 0.002751584023662221, "loss": 1.1748, "step": 3041 }, { "epoch": 0.2667583776473667, "grad_norm": 0.078125, "learning_rate": 0.0027513483033670513, "loss": 1.2638, "step": 3042 }, { "epoch": 0.26684606942174127, "grad_norm": 0.1279296875, "learning_rate": 0.00275111248263176, "loss": 1.2573, "step": 3043 }, { "epoch": 0.2669337611961158, "grad_norm": 0.07958984375, "learning_rate": 0.0027508765614778527, "loss": 1.21, "step": 3044 }, { "epoch": 0.26702145297049035, "grad_norm": 0.07275390625, "learning_rate": 0.0027506405399268445, "loss": 1.2129, "step": 3045 }, { "epoch": 0.2671091447448649, "grad_norm": 0.072265625, "learning_rate": 0.002750404418000261, "loss": 1.2504, "step": 3046 }, { "epoch": 0.2671968365192394, "grad_norm": 0.09326171875, "learning_rate": 0.002750168195719635, "loss": 1.3592, "step": 3047 }, { "epoch": 0.267284528293614, "grad_norm": 0.07080078125, "learning_rate": 0.0027499318731065096, "loss": 1.239, "step": 3048 }, { "epoch": 0.26737222006798855, "grad_norm": 0.1357421875, "learning_rate": 0.002749695450182437, "loss": 1.2477, "step": 3049 }, { "epoch": 0.26745991184236306, "grad_norm": 0.06396484375, "learning_rate": 0.0027494589269689775, "loss": 1.2372, "step": 3050 }, { "epoch": 0.26754760361673763, "grad_norm": 0.1494140625, "learning_rate": 0.0027492223034877024, "loss": 1.2643, "step": 3051 }, { "epoch": 0.26763529539111214, "grad_norm": 0.05859375, "learning_rate": 0.0027489855797601915, "loss": 1.2295, "step": 3052 }, { "epoch": 0.2677229871654867, "grad_norm": 0.1572265625, "learning_rate": 0.002748748755808032, "loss": 1.2405, "step": 3053 }, { "epoch": 0.26781067893986127, "grad_norm": 0.0673828125, "learning_rate": 0.0027485118316528225, "loss": 1.2448, "step": 3054 }, { "epoch": 0.2678983707142358, "grad_norm": 0.0966796875, "learning_rate": 0.0027482748073161703, "loss": 1.3092, "step": 3055 }, { "epoch": 0.26798606248861034, "grad_norm": 0.068359375, "learning_rate": 0.00274803768281969, "loss": 1.2934, "step": 3056 }, { "epoch": 0.2680737542629849, "grad_norm": 0.09765625, "learning_rate": 0.002747800458185008, "loss": 1.2691, "step": 3057 }, { "epoch": 0.2681614460373594, "grad_norm": 0.0810546875, "learning_rate": 0.0027475631334337586, "loss": 1.2401, "step": 3058 }, { "epoch": 0.268249137811734, "grad_norm": 0.169921875, "learning_rate": 0.0027473257085875837, "loss": 1.2933, "step": 3059 }, { "epoch": 0.26833682958610855, "grad_norm": 0.1845703125, "learning_rate": 0.0027470881836681375, "loss": 1.2444, "step": 3060 }, { "epoch": 0.26842452136048306, "grad_norm": 0.07177734375, "learning_rate": 0.0027468505586970816, "loss": 1.2783, "step": 3061 }, { "epoch": 0.2685122131348576, "grad_norm": 0.2216796875, "learning_rate": 0.002746612833696086, "loss": 1.2755, "step": 3062 }, { "epoch": 0.2685999049092322, "grad_norm": 0.0712890625, "learning_rate": 0.00274637500868683, "loss": 1.2336, "step": 3063 }, { "epoch": 0.2686875966836067, "grad_norm": 0.275390625, "learning_rate": 0.0027461370836910047, "loss": 1.2925, "step": 3064 }, { "epoch": 0.26877528845798127, "grad_norm": 0.1318359375, "learning_rate": 0.002745899058730306, "loss": 1.2401, "step": 3065 }, { "epoch": 0.2688629802323558, "grad_norm": 0.2294921875, "learning_rate": 0.002745660933826443, "loss": 1.2863, "step": 3066 }, { "epoch": 0.26895067200673034, "grad_norm": 0.1318359375, "learning_rate": 0.0027454227090011308, "loss": 1.2955, "step": 3067 }, { "epoch": 0.2690383637811049, "grad_norm": 0.1513671875, "learning_rate": 0.0027451843842760957, "loss": 1.2272, "step": 3068 }, { "epoch": 0.2691260555554794, "grad_norm": 0.06298828125, "learning_rate": 0.0027449459596730714, "loss": 1.3204, "step": 3069 }, { "epoch": 0.269213747329854, "grad_norm": 0.1806640625, "learning_rate": 0.002744707435213803, "loss": 1.2679, "step": 3070 }, { "epoch": 0.26930143910422855, "grad_norm": 0.08837890625, "learning_rate": 0.002744468810920042, "loss": 1.2604, "step": 3071 }, { "epoch": 0.26938913087860306, "grad_norm": 0.2197265625, "learning_rate": 0.0027442300868135506, "loss": 1.2726, "step": 3072 }, { "epoch": 0.2694768226529776, "grad_norm": 0.0732421875, "learning_rate": 0.0027439912629161, "loss": 1.2575, "step": 3073 }, { "epoch": 0.2695645144273522, "grad_norm": 0.1396484375, "learning_rate": 0.002743752339249471, "loss": 1.3022, "step": 3074 }, { "epoch": 0.2696522062017267, "grad_norm": 0.09033203125, "learning_rate": 0.0027435133158354515, "loss": 1.2643, "step": 3075 }, { "epoch": 0.26973989797610126, "grad_norm": 0.1240234375, "learning_rate": 0.0027432741926958406, "loss": 1.2649, "step": 3076 }, { "epoch": 0.26982758975047577, "grad_norm": 0.08544921875, "learning_rate": 0.0027430349698524463, "loss": 1.2011, "step": 3077 }, { "epoch": 0.26991528152485034, "grad_norm": 0.11572265625, "learning_rate": 0.002742795647327083, "loss": 1.3274, "step": 3078 }, { "epoch": 0.2700029732992249, "grad_norm": 0.09521484375, "learning_rate": 0.0027425562251415787, "loss": 1.284, "step": 3079 }, { "epoch": 0.2700906650735994, "grad_norm": 0.07275390625, "learning_rate": 0.002742316703317767, "loss": 1.2464, "step": 3080 }, { "epoch": 0.270178356847974, "grad_norm": 0.09716796875, "learning_rate": 0.0027420770818774916, "loss": 1.2999, "step": 3081 }, { "epoch": 0.27026604862234854, "grad_norm": 0.1318359375, "learning_rate": 0.002741837360842606, "loss": 1.3159, "step": 3082 }, { "epoch": 0.27035374039672305, "grad_norm": 0.06884765625, "learning_rate": 0.002741597540234971, "loss": 1.2994, "step": 3083 }, { "epoch": 0.2704414321710976, "grad_norm": 0.08984375, "learning_rate": 0.0027413576200764583, "loss": 1.2113, "step": 3084 }, { "epoch": 0.2705291239454722, "grad_norm": 0.07958984375, "learning_rate": 0.0027411176003889486, "loss": 1.2892, "step": 3085 }, { "epoch": 0.2706168157198467, "grad_norm": 0.0771484375, "learning_rate": 0.0027408774811943297, "loss": 1.2266, "step": 3086 }, { "epoch": 0.27070450749422126, "grad_norm": 0.061767578125, "learning_rate": 0.0027406372625145012, "loss": 1.26, "step": 3087 }, { "epoch": 0.27079219926859577, "grad_norm": 0.095703125, "learning_rate": 0.0027403969443713693, "loss": 1.2235, "step": 3088 }, { "epoch": 0.27087989104297033, "grad_norm": 0.08203125, "learning_rate": 0.0027401565267868512, "loss": 1.2472, "step": 3089 }, { "epoch": 0.2709675828173449, "grad_norm": 0.07958984375, "learning_rate": 0.002739916009782872, "loss": 1.2661, "step": 3090 }, { "epoch": 0.2710552745917194, "grad_norm": 0.06298828125, "learning_rate": 0.0027396753933813666, "loss": 1.2356, "step": 3091 }, { "epoch": 0.271142966366094, "grad_norm": 0.1943359375, "learning_rate": 0.002739434677604278, "loss": 1.3064, "step": 3092 }, { "epoch": 0.27123065814046854, "grad_norm": 0.09130859375, "learning_rate": 0.002739193862473559, "loss": 1.2286, "step": 3093 }, { "epoch": 0.27131834991484305, "grad_norm": 0.12060546875, "learning_rate": 0.002738952948011172, "loss": 1.2517, "step": 3094 }, { "epoch": 0.2714060416892176, "grad_norm": 0.0791015625, "learning_rate": 0.0027387119342390867, "loss": 1.2252, "step": 3095 }, { "epoch": 0.2714937334635922, "grad_norm": 0.142578125, "learning_rate": 0.002738470821179284, "loss": 1.2018, "step": 3096 }, { "epoch": 0.2715814252379667, "grad_norm": 0.13671875, "learning_rate": 0.002738229608853752, "loss": 1.2778, "step": 3097 }, { "epoch": 0.27166911701234125, "grad_norm": 0.07275390625, "learning_rate": 0.0027379882972844894, "loss": 1.2472, "step": 3098 }, { "epoch": 0.2717568087867158, "grad_norm": 0.09033203125, "learning_rate": 0.002737746886493502, "loss": 1.2114, "step": 3099 }, { "epoch": 0.27184450056109033, "grad_norm": 0.06591796875, "learning_rate": 0.0027375053765028067, "loss": 1.2506, "step": 3100 }, { "epoch": 0.2719321923354649, "grad_norm": 0.0654296875, "learning_rate": 0.0027372637673344284, "loss": 1.2715, "step": 3101 }, { "epoch": 0.2720198841098394, "grad_norm": 0.109375, "learning_rate": 0.0027370220590104016, "loss": 1.2963, "step": 3102 }, { "epoch": 0.27210757588421397, "grad_norm": 0.059326171875, "learning_rate": 0.0027367802515527694, "loss": 1.2123, "step": 3103 }, { "epoch": 0.27219526765858854, "grad_norm": 0.08251953125, "learning_rate": 0.002736538344983583, "loss": 1.2826, "step": 3104 }, { "epoch": 0.27228295943296305, "grad_norm": 0.055908203125, "learning_rate": 0.0027362963393249046, "loss": 1.278, "step": 3105 }, { "epoch": 0.2723706512073376, "grad_norm": 0.061767578125, "learning_rate": 0.002736054234598805, "loss": 1.235, "step": 3106 }, { "epoch": 0.2724583429817122, "grad_norm": 0.0654296875, "learning_rate": 0.0027358120308273626, "loss": 1.2437, "step": 3107 }, { "epoch": 0.2725460347560867, "grad_norm": 0.07666015625, "learning_rate": 0.002735569728032665, "loss": 1.2822, "step": 3108 }, { "epoch": 0.27263372653046125, "grad_norm": 0.06787109375, "learning_rate": 0.0027353273262368115, "loss": 1.3354, "step": 3109 }, { "epoch": 0.2727214183048358, "grad_norm": 0.150390625, "learning_rate": 0.0027350848254619076, "loss": 1.232, "step": 3110 }, { "epoch": 0.2728091100792103, "grad_norm": 0.07958984375, "learning_rate": 0.0027348422257300687, "loss": 1.2138, "step": 3111 }, { "epoch": 0.2728968018535849, "grad_norm": 0.09716796875, "learning_rate": 0.0027345995270634193, "loss": 1.2339, "step": 3112 }, { "epoch": 0.2729844936279594, "grad_norm": 0.06689453125, "learning_rate": 0.0027343567294840928, "loss": 1.2918, "step": 3113 }, { "epoch": 0.27307218540233397, "grad_norm": 0.08642578125, "learning_rate": 0.0027341138330142316, "loss": 1.282, "step": 3114 }, { "epoch": 0.27315987717670853, "grad_norm": 0.05859375, "learning_rate": 0.0027338708376759883, "loss": 1.2415, "step": 3115 }, { "epoch": 0.27324756895108304, "grad_norm": 0.08740234375, "learning_rate": 0.002733627743491522, "loss": 1.2454, "step": 3116 }, { "epoch": 0.2733352607254576, "grad_norm": 0.06884765625, "learning_rate": 0.0027333845504830027, "loss": 1.207, "step": 3117 }, { "epoch": 0.2734229524998322, "grad_norm": 0.11767578125, "learning_rate": 0.002733141258672609, "loss": 1.2635, "step": 3118 }, { "epoch": 0.2735106442742067, "grad_norm": 0.06494140625, "learning_rate": 0.0027328978680825292, "loss": 1.221, "step": 3119 }, { "epoch": 0.27359833604858125, "grad_norm": 0.1044921875, "learning_rate": 0.0027326543787349594, "loss": 1.2978, "step": 3120 }, { "epoch": 0.2736860278229558, "grad_norm": 0.056396484375, "learning_rate": 0.0027324107906521043, "loss": 1.2029, "step": 3121 }, { "epoch": 0.2737737195973303, "grad_norm": 0.08251953125, "learning_rate": 0.00273216710385618, "loss": 1.2681, "step": 3122 }, { "epoch": 0.2738614113717049, "grad_norm": 0.0654296875, "learning_rate": 0.002731923318369409, "loss": 1.2606, "step": 3123 }, { "epoch": 0.2739491031460794, "grad_norm": 0.10009765625, "learning_rate": 0.0027316794342140244, "loss": 1.3328, "step": 3124 }, { "epoch": 0.27403679492045396, "grad_norm": 0.06787109375, "learning_rate": 0.0027314354514122686, "loss": 1.3222, "step": 3125 }, { "epoch": 0.27412448669482853, "grad_norm": 0.0859375, "learning_rate": 0.0027311913699863905, "loss": 1.1962, "step": 3126 }, { "epoch": 0.27421217846920304, "grad_norm": 0.0810546875, "learning_rate": 0.002730947189958651, "loss": 1.2725, "step": 3127 }, { "epoch": 0.2742998702435776, "grad_norm": 0.09130859375, "learning_rate": 0.002730702911351317, "loss": 1.3021, "step": 3128 }, { "epoch": 0.27438756201795217, "grad_norm": 0.10986328125, "learning_rate": 0.0027304585341866687, "loss": 1.2567, "step": 3129 }, { "epoch": 0.2744752537923267, "grad_norm": 0.078125, "learning_rate": 0.0027302140584869903, "loss": 1.2036, "step": 3130 }, { "epoch": 0.27456294556670124, "grad_norm": 0.072265625, "learning_rate": 0.0027299694842745793, "loss": 1.2829, "step": 3131 }, { "epoch": 0.2746506373410758, "grad_norm": 0.068359375, "learning_rate": 0.002729724811571739, "loss": 1.2396, "step": 3132 }, { "epoch": 0.2747383291154503, "grad_norm": 0.06201171875, "learning_rate": 0.002729480040400783, "loss": 1.2634, "step": 3133 }, { "epoch": 0.2748260208898249, "grad_norm": 0.07666015625, "learning_rate": 0.002729235170784033, "loss": 1.2184, "step": 3134 }, { "epoch": 0.2749137126641994, "grad_norm": 0.08642578125, "learning_rate": 0.0027289902027438227, "loss": 1.2164, "step": 3135 }, { "epoch": 0.27500140443857396, "grad_norm": 0.0703125, "learning_rate": 0.0027287451363024917, "loss": 1.1516, "step": 3136 }, { "epoch": 0.2750890962129485, "grad_norm": 0.0654296875, "learning_rate": 0.002728499971482388, "loss": 1.2932, "step": 3137 }, { "epoch": 0.27517678798732303, "grad_norm": 0.083984375, "learning_rate": 0.0027282547083058713, "loss": 1.283, "step": 3138 }, { "epoch": 0.2752644797616976, "grad_norm": 0.056884765625, "learning_rate": 0.0027280093467953086, "loss": 1.2664, "step": 3139 }, { "epoch": 0.27535217153607217, "grad_norm": 0.095703125, "learning_rate": 0.0027277638869730764, "loss": 1.2171, "step": 3140 }, { "epoch": 0.2754398633104467, "grad_norm": 0.0556640625, "learning_rate": 0.0027275183288615603, "loss": 1.2511, "step": 3141 }, { "epoch": 0.27552755508482124, "grad_norm": 0.1396484375, "learning_rate": 0.002727272672483154, "loss": 1.1944, "step": 3142 }, { "epoch": 0.2756152468591958, "grad_norm": 0.1357421875, "learning_rate": 0.0027270269178602605, "loss": 1.1998, "step": 3143 }, { "epoch": 0.2757029386335703, "grad_norm": 0.0966796875, "learning_rate": 0.002726781065015293, "loss": 1.2243, "step": 3144 }, { "epoch": 0.2757906304079449, "grad_norm": 0.28515625, "learning_rate": 0.002726535113970672, "loss": 1.2824, "step": 3145 }, { "epoch": 0.27587832218231945, "grad_norm": 0.12158203125, "learning_rate": 0.0027262890647488276, "loss": 1.2512, "step": 3146 }, { "epoch": 0.27596601395669396, "grad_norm": 0.283203125, "learning_rate": 0.002726042917372199, "loss": 1.2451, "step": 3147 }, { "epoch": 0.2760537057310685, "grad_norm": 0.2119140625, "learning_rate": 0.0027257966718632343, "loss": 1.2356, "step": 3148 }, { "epoch": 0.27614139750544303, "grad_norm": 0.1787109375, "learning_rate": 0.0027255503282443898, "loss": 1.2926, "step": 3149 }, { "epoch": 0.2762290892798176, "grad_norm": 0.1240234375, "learning_rate": 0.0027253038865381316, "loss": 1.2315, "step": 3150 }, { "epoch": 0.27631678105419216, "grad_norm": 0.1982421875, "learning_rate": 0.0027250573467669354, "loss": 1.2505, "step": 3151 }, { "epoch": 0.27640447282856667, "grad_norm": 0.08154296875, "learning_rate": 0.0027248107089532844, "loss": 1.2848, "step": 3152 }, { "epoch": 0.27649216460294124, "grad_norm": 0.1962890625, "learning_rate": 0.002724563973119671, "loss": 1.3143, "step": 3153 }, { "epoch": 0.2765798563773158, "grad_norm": 0.095703125, "learning_rate": 0.0027243171392885976, "loss": 1.2993, "step": 3154 }, { "epoch": 0.2766675481516903, "grad_norm": 0.08642578125, "learning_rate": 0.002724070207482575, "loss": 1.2279, "step": 3155 }, { "epoch": 0.2767552399260649, "grad_norm": 0.08935546875, "learning_rate": 0.002723823177724121, "loss": 1.2013, "step": 3156 }, { "epoch": 0.27684293170043944, "grad_norm": 0.08056640625, "learning_rate": 0.0027235760500357656, "loss": 1.2427, "step": 3157 }, { "epoch": 0.27693062347481395, "grad_norm": 0.0810546875, "learning_rate": 0.002723328824440046, "loss": 1.2356, "step": 3158 }, { "epoch": 0.2770183152491885, "grad_norm": 0.08837890625, "learning_rate": 0.0027230815009595083, "loss": 1.2326, "step": 3159 }, { "epoch": 0.27710600702356303, "grad_norm": 0.0888671875, "learning_rate": 0.002722834079616708, "loss": 1.2416, "step": 3160 }, { "epoch": 0.2771936987979376, "grad_norm": 0.0927734375, "learning_rate": 0.0027225865604342094, "loss": 1.2032, "step": 3161 }, { "epoch": 0.27728139057231216, "grad_norm": 0.06689453125, "learning_rate": 0.0027223389434345847, "loss": 1.2644, "step": 3162 }, { "epoch": 0.27736908234668667, "grad_norm": 0.177734375, "learning_rate": 0.002722091228640417, "loss": 1.251, "step": 3163 }, { "epoch": 0.27745677412106123, "grad_norm": 0.0654296875, "learning_rate": 0.0027218434160742972, "loss": 1.2812, "step": 3164 }, { "epoch": 0.2775444658954358, "grad_norm": 0.12060546875, "learning_rate": 0.0027215955057588244, "loss": 1.2155, "step": 3165 }, { "epoch": 0.2776321576698103, "grad_norm": 0.062255859375, "learning_rate": 0.0027213474977166078, "loss": 1.3365, "step": 3166 }, { "epoch": 0.2777198494441849, "grad_norm": 0.09814453125, "learning_rate": 0.0027210993919702647, "loss": 1.2537, "step": 3167 }, { "epoch": 0.27780754121855944, "grad_norm": 0.11279296875, "learning_rate": 0.002720851188542423, "loss": 1.3081, "step": 3168 }, { "epoch": 0.27789523299293395, "grad_norm": 0.095703125, "learning_rate": 0.0027206028874557165, "loss": 1.1992, "step": 3169 }, { "epoch": 0.2779829247673085, "grad_norm": 0.240234375, "learning_rate": 0.0027203544887327907, "loss": 1.3415, "step": 3170 }, { "epoch": 0.278070616541683, "grad_norm": 0.23046875, "learning_rate": 0.0027201059923962984, "loss": 1.2768, "step": 3171 }, { "epoch": 0.2781583083160576, "grad_norm": 0.13671875, "learning_rate": 0.002719857398468902, "loss": 1.2647, "step": 3172 }, { "epoch": 0.27824600009043216, "grad_norm": 0.125, "learning_rate": 0.0027196087069732734, "loss": 1.2595, "step": 3173 }, { "epoch": 0.27833369186480666, "grad_norm": 0.1923828125, "learning_rate": 0.002719359917932091, "loss": 1.2586, "step": 3174 }, { "epoch": 0.27842138363918123, "grad_norm": 0.10498046875, "learning_rate": 0.002719111031368045, "loss": 1.2823, "step": 3175 }, { "epoch": 0.2785090754135558, "grad_norm": 0.193359375, "learning_rate": 0.002718862047303833, "loss": 1.2538, "step": 3176 }, { "epoch": 0.2785967671879303, "grad_norm": 0.07568359375, "learning_rate": 0.0027186129657621617, "loss": 1.191, "step": 3177 }, { "epoch": 0.27868445896230487, "grad_norm": 0.177734375, "learning_rate": 0.002718363786765746, "loss": 1.2129, "step": 3178 }, { "epoch": 0.27877215073667944, "grad_norm": 0.0693359375, "learning_rate": 0.0027181145103373112, "loss": 1.2534, "step": 3179 }, { "epoch": 0.27885984251105395, "grad_norm": 0.1787109375, "learning_rate": 0.0027178651364995906, "loss": 1.2581, "step": 3180 }, { "epoch": 0.2789475342854285, "grad_norm": 0.11572265625, "learning_rate": 0.0027176156652753258, "loss": 1.2395, "step": 3181 }, { "epoch": 0.2790352260598031, "grad_norm": 0.14453125, "learning_rate": 0.0027173660966872686, "loss": 1.2174, "step": 3182 }, { "epoch": 0.2791229178341776, "grad_norm": 0.1259765625, "learning_rate": 0.002717116430758178, "loss": 1.2845, "step": 3183 }, { "epoch": 0.27921060960855215, "grad_norm": 0.08935546875, "learning_rate": 0.0027168666675108253, "loss": 1.268, "step": 3184 }, { "epoch": 0.27929830138292666, "grad_norm": 0.0830078125, "learning_rate": 0.0027166168069679857, "loss": 1.2, "step": 3185 }, { "epoch": 0.2793859931573012, "grad_norm": 0.0751953125, "learning_rate": 0.0027163668491524463, "loss": 1.247, "step": 3186 }, { "epoch": 0.2794736849316758, "grad_norm": 0.07958984375, "learning_rate": 0.002716116794087004, "loss": 1.2857, "step": 3187 }, { "epoch": 0.2795613767060503, "grad_norm": 0.09228515625, "learning_rate": 0.0027158666417944617, "loss": 1.2934, "step": 3188 }, { "epoch": 0.27964906848042487, "grad_norm": 0.0830078125, "learning_rate": 0.002715616392297633, "loss": 1.2545, "step": 3189 }, { "epoch": 0.27973676025479943, "grad_norm": 0.09619140625, "learning_rate": 0.0027153660456193406, "loss": 1.2687, "step": 3190 }, { "epoch": 0.27982445202917394, "grad_norm": 0.0849609375, "learning_rate": 0.002715115601782415, "loss": 1.2298, "step": 3191 }, { "epoch": 0.2799121438035485, "grad_norm": 0.08154296875, "learning_rate": 0.0027148650608096957, "loss": 1.3194, "step": 3192 }, { "epoch": 0.2799998355779231, "grad_norm": 0.1376953125, "learning_rate": 0.002714614422724032, "loss": 1.2554, "step": 3193 }, { "epoch": 0.2800875273522976, "grad_norm": 0.07568359375, "learning_rate": 0.0027143636875482818, "loss": 1.2185, "step": 3194 }, { "epoch": 0.28017521912667215, "grad_norm": 0.12158203125, "learning_rate": 0.00271411285530531, "loss": 1.1821, "step": 3195 }, { "epoch": 0.28026291090104666, "grad_norm": 0.083984375, "learning_rate": 0.0027138619260179933, "loss": 1.3091, "step": 3196 }, { "epoch": 0.2803506026754212, "grad_norm": 0.15234375, "learning_rate": 0.002713610899709216, "loss": 1.1963, "step": 3197 }, { "epoch": 0.2804382944497958, "grad_norm": 0.0869140625, "learning_rate": 0.0027133597764018693, "loss": 1.2973, "step": 3198 }, { "epoch": 0.2805259862241703, "grad_norm": 0.1357421875, "learning_rate": 0.002713108556118856, "loss": 1.2174, "step": 3199 }, { "epoch": 0.28061367799854486, "grad_norm": 0.06591796875, "learning_rate": 0.002712857238883087, "loss": 1.2177, "step": 3200 }, { "epoch": 0.28070136977291943, "grad_norm": 0.1650390625, "learning_rate": 0.0027126058247174826, "loss": 1.2362, "step": 3201 }, { "epoch": 0.28078906154729394, "grad_norm": 0.08642578125, "learning_rate": 0.0027123543136449687, "loss": 1.2779, "step": 3202 }, { "epoch": 0.2808767533216685, "grad_norm": 0.1318359375, "learning_rate": 0.002712102705688484, "loss": 1.3116, "step": 3203 }, { "epoch": 0.28096444509604307, "grad_norm": 0.0634765625, "learning_rate": 0.002711851000870975, "loss": 1.1808, "step": 3204 }, { "epoch": 0.2810521368704176, "grad_norm": 0.1826171875, "learning_rate": 0.0027115991992153953, "loss": 1.2377, "step": 3205 }, { "epoch": 0.28113982864479214, "grad_norm": 0.1015625, "learning_rate": 0.0027113473007447098, "loss": 1.2306, "step": 3206 }, { "epoch": 0.28122752041916665, "grad_norm": 0.12255859375, "learning_rate": 0.00271109530548189, "loss": 1.2907, "step": 3207 }, { "epoch": 0.2813152121935412, "grad_norm": 0.08984375, "learning_rate": 0.002710843213449917, "loss": 1.2386, "step": 3208 }, { "epoch": 0.2814029039679158, "grad_norm": 0.12451171875, "learning_rate": 0.0027105910246717825, "loss": 1.2273, "step": 3209 }, { "epoch": 0.2814905957422903, "grad_norm": 0.0947265625, "learning_rate": 0.002710338739170484, "loss": 1.2424, "step": 3210 }, { "epoch": 0.28157828751666486, "grad_norm": 0.0673828125, "learning_rate": 0.002710086356969029, "loss": 1.2758, "step": 3211 }, { "epoch": 0.2816659792910394, "grad_norm": 0.07958984375, "learning_rate": 0.002709833878090436, "loss": 1.2095, "step": 3212 }, { "epoch": 0.28175367106541394, "grad_norm": 0.060791015625, "learning_rate": 0.0027095813025577284, "loss": 1.2588, "step": 3213 }, { "epoch": 0.2818413628397885, "grad_norm": 0.0712890625, "learning_rate": 0.002709328630393942, "loss": 1.2729, "step": 3214 }, { "epoch": 0.28192905461416307, "grad_norm": 0.10693359375, "learning_rate": 0.0027090758616221183, "loss": 1.2127, "step": 3215 }, { "epoch": 0.2820167463885376, "grad_norm": 0.07861328125, "learning_rate": 0.0027088229962653108, "loss": 1.2942, "step": 3216 }, { "epoch": 0.28210443816291214, "grad_norm": 0.12158203125, "learning_rate": 0.002708570034346579, "loss": 1.1928, "step": 3217 }, { "epoch": 0.2821921299372867, "grad_norm": 0.07470703125, "learning_rate": 0.002708316975888993, "loss": 1.2079, "step": 3218 }, { "epoch": 0.2822798217116612, "grad_norm": 0.1005859375, "learning_rate": 0.0027080638209156303, "loss": 1.2308, "step": 3219 }, { "epoch": 0.2823675134860358, "grad_norm": 0.07763671875, "learning_rate": 0.0027078105694495792, "loss": 1.1913, "step": 3220 }, { "epoch": 0.2824552052604103, "grad_norm": 0.061279296875, "learning_rate": 0.002707557221513935, "loss": 1.2637, "step": 3221 }, { "epoch": 0.28254289703478486, "grad_norm": 0.07177734375, "learning_rate": 0.0027073037771318015, "loss": 1.2291, "step": 3222 }, { "epoch": 0.2826305888091594, "grad_norm": 0.0634765625, "learning_rate": 0.002707050236326293, "loss": 1.2401, "step": 3223 }, { "epoch": 0.28271828058353393, "grad_norm": 0.06591796875, "learning_rate": 0.0027067965991205323, "loss": 1.302, "step": 3224 }, { "epoch": 0.2828059723579085, "grad_norm": 0.056396484375, "learning_rate": 0.002706542865537649, "loss": 1.2425, "step": 3225 }, { "epoch": 0.28289366413228306, "grad_norm": 0.06640625, "learning_rate": 0.002706289035600785, "loss": 1.2577, "step": 3226 }, { "epoch": 0.2829813559066576, "grad_norm": 0.076171875, "learning_rate": 0.002706035109333087, "loss": 1.3274, "step": 3227 }, { "epoch": 0.28306904768103214, "grad_norm": 0.06982421875, "learning_rate": 0.002705781086757713, "loss": 1.3964, "step": 3228 }, { "epoch": 0.2831567394554067, "grad_norm": 0.103515625, "learning_rate": 0.00270552696789783, "loss": 1.22, "step": 3229 }, { "epoch": 0.2832444312297812, "grad_norm": 0.0615234375, "learning_rate": 0.0027052727527766118, "loss": 1.309, "step": 3230 }, { "epoch": 0.2833321230041558, "grad_norm": 0.1044921875, "learning_rate": 0.002705018441417243, "loss": 1.248, "step": 3231 }, { "epoch": 0.2834198147785303, "grad_norm": 0.05517578125, "learning_rate": 0.0027047640338429156, "loss": 1.2381, "step": 3232 }, { "epoch": 0.28350750655290485, "grad_norm": 0.10107421875, "learning_rate": 0.0027045095300768317, "loss": 1.314, "step": 3233 }, { "epoch": 0.2835951983272794, "grad_norm": 0.0908203125, "learning_rate": 0.0027042549301422004, "loss": 1.3093, "step": 3234 }, { "epoch": 0.28368289010165393, "grad_norm": 0.058349609375, "learning_rate": 0.0027040002340622407, "loss": 1.2236, "step": 3235 }, { "epoch": 0.2837705818760285, "grad_norm": 0.0634765625, "learning_rate": 0.0027037454418601813, "loss": 1.2469, "step": 3236 }, { "epoch": 0.28385827365040306, "grad_norm": 0.0654296875, "learning_rate": 0.0027034905535592573, "loss": 1.2655, "step": 3237 }, { "epoch": 0.28394596542477757, "grad_norm": 0.07568359375, "learning_rate": 0.002703235569182715, "loss": 1.304, "step": 3238 }, { "epoch": 0.28403365719915213, "grad_norm": 0.060546875, "learning_rate": 0.0027029804887538076, "loss": 1.2202, "step": 3239 }, { "epoch": 0.2841213489735267, "grad_norm": 0.0673828125, "learning_rate": 0.0027027253122957973, "loss": 1.2078, "step": 3240 }, { "epoch": 0.2842090407479012, "grad_norm": 0.080078125, "learning_rate": 0.0027024700398319565, "loss": 1.2527, "step": 3241 }, { "epoch": 0.2842967325222758, "grad_norm": 0.0654296875, "learning_rate": 0.002702214671385565, "loss": 1.2678, "step": 3242 }, { "epoch": 0.2843844242966503, "grad_norm": 0.12255859375, "learning_rate": 0.002701959206979912, "loss": 1.2247, "step": 3243 }, { "epoch": 0.28447211607102485, "grad_norm": 0.07275390625, "learning_rate": 0.0027017036466382943, "loss": 1.2471, "step": 3244 }, { "epoch": 0.2845598078453994, "grad_norm": 0.13671875, "learning_rate": 0.002701447990384019, "loss": 1.273, "step": 3245 }, { "epoch": 0.2846474996197739, "grad_norm": 0.07177734375, "learning_rate": 0.0027011922382404016, "loss": 1.2739, "step": 3246 }, { "epoch": 0.2847351913941485, "grad_norm": 0.1474609375, "learning_rate": 0.0027009363902307654, "loss": 1.277, "step": 3247 }, { "epoch": 0.28482288316852306, "grad_norm": 0.0712890625, "learning_rate": 0.002700680446378444, "loss": 1.2527, "step": 3248 }, { "epoch": 0.28491057494289757, "grad_norm": 0.10595703125, "learning_rate": 0.002700424406706778, "loss": 1.2615, "step": 3249 }, { "epoch": 0.28499826671727213, "grad_norm": 0.0810546875, "learning_rate": 0.0027001682712391173, "loss": 1.2147, "step": 3250 }, { "epoch": 0.2850859584916467, "grad_norm": 0.10546875, "learning_rate": 0.0026999120399988216, "loss": 1.2149, "step": 3251 }, { "epoch": 0.2851736502660212, "grad_norm": 0.07177734375, "learning_rate": 0.0026996557130092584, "loss": 1.2551, "step": 3252 }, { "epoch": 0.28526134204039577, "grad_norm": 0.06591796875, "learning_rate": 0.0026993992902938036, "loss": 1.2677, "step": 3253 }, { "epoch": 0.28534903381477034, "grad_norm": 0.0673828125, "learning_rate": 0.0026991427718758426, "loss": 1.267, "step": 3254 }, { "epoch": 0.28543672558914485, "grad_norm": 0.07177734375, "learning_rate": 0.002698886157778769, "loss": 1.227, "step": 3255 }, { "epoch": 0.2855244173635194, "grad_norm": 0.061279296875, "learning_rate": 0.002698629448025986, "loss": 1.1716, "step": 3256 }, { "epoch": 0.2856121091378939, "grad_norm": 0.06494140625, "learning_rate": 0.0026983726426409037, "loss": 1.1917, "step": 3257 }, { "epoch": 0.2856998009122685, "grad_norm": 0.083984375, "learning_rate": 0.002698115741646943, "loss": 1.2197, "step": 3258 }, { "epoch": 0.28578749268664305, "grad_norm": 0.056640625, "learning_rate": 0.0026978587450675324, "loss": 1.2131, "step": 3259 }, { "epoch": 0.28587518446101756, "grad_norm": 0.08740234375, "learning_rate": 0.00269760165292611, "loss": 1.2746, "step": 3260 }, { "epoch": 0.2859628762353921, "grad_norm": 0.10302734375, "learning_rate": 0.00269734446524612, "loss": 1.1914, "step": 3261 }, { "epoch": 0.2860505680097667, "grad_norm": 0.05224609375, "learning_rate": 0.0026970871820510187, "loss": 1.2459, "step": 3262 }, { "epoch": 0.2861382597841412, "grad_norm": 0.07177734375, "learning_rate": 0.00269682980336427, "loss": 1.2451, "step": 3263 }, { "epoch": 0.28622595155851577, "grad_norm": 0.11279296875, "learning_rate": 0.0026965723292093455, "loss": 1.2741, "step": 3264 }, { "epoch": 0.28631364333289033, "grad_norm": 0.1435546875, "learning_rate": 0.002696314759609726, "loss": 1.1765, "step": 3265 }, { "epoch": 0.28640133510726484, "grad_norm": 0.0595703125, "learning_rate": 0.0026960570945889013, "loss": 1.2042, "step": 3266 }, { "epoch": 0.2864890268816394, "grad_norm": 0.12255859375, "learning_rate": 0.0026957993341703702, "loss": 1.2045, "step": 3267 }, { "epoch": 0.2865767186560139, "grad_norm": 0.07177734375, "learning_rate": 0.0026955414783776397, "loss": 1.342, "step": 3268 }, { "epoch": 0.2866644104303885, "grad_norm": 0.1298828125, "learning_rate": 0.002695283527234225, "loss": 1.275, "step": 3269 }, { "epoch": 0.28675210220476305, "grad_norm": 0.0615234375, "learning_rate": 0.0026950254807636517, "loss": 1.2146, "step": 3270 }, { "epoch": 0.28683979397913756, "grad_norm": 0.1337890625, "learning_rate": 0.0026947673389894518, "loss": 1.2608, "step": 3271 }, { "epoch": 0.2869274857535121, "grad_norm": 0.07861328125, "learning_rate": 0.002694509101935168, "loss": 1.1589, "step": 3272 }, { "epoch": 0.2870151775278867, "grad_norm": 0.193359375, "learning_rate": 0.0026942507696243495, "loss": 1.2726, "step": 3273 }, { "epoch": 0.2871028693022612, "grad_norm": 0.21484375, "learning_rate": 0.002693992342080558, "loss": 1.2923, "step": 3274 }, { "epoch": 0.28719056107663576, "grad_norm": 0.08251953125, "learning_rate": 0.002693733819327359, "loss": 1.274, "step": 3275 }, { "epoch": 0.28727825285101033, "grad_norm": 0.11328125, "learning_rate": 0.00269347520138833, "loss": 1.2155, "step": 3276 }, { "epoch": 0.28736594462538484, "grad_norm": 0.10595703125, "learning_rate": 0.0026932164882870566, "loss": 1.259, "step": 3277 }, { "epoch": 0.2874536363997594, "grad_norm": 0.1376953125, "learning_rate": 0.0026929576800471325, "loss": 1.266, "step": 3278 }, { "epoch": 0.2875413281741339, "grad_norm": 0.11279296875, "learning_rate": 0.00269269877669216, "loss": 1.2527, "step": 3279 }, { "epoch": 0.2876290199485085, "grad_norm": 0.07421875, "learning_rate": 0.0026924397782457517, "loss": 1.233, "step": 3280 }, { "epoch": 0.28771671172288305, "grad_norm": 0.09326171875, "learning_rate": 0.002692180684731526, "loss": 1.2826, "step": 3281 }, { "epoch": 0.28780440349725755, "grad_norm": 0.10791015625, "learning_rate": 0.0026919214961731125, "loss": 1.2348, "step": 3282 }, { "epoch": 0.2878920952716321, "grad_norm": 0.10107421875, "learning_rate": 0.002691662212594148, "loss": 1.2722, "step": 3283 }, { "epoch": 0.2879797870460067, "grad_norm": 0.09228515625, "learning_rate": 0.0026914028340182788, "loss": 1.221, "step": 3284 }, { "epoch": 0.2880674788203812, "grad_norm": 0.205078125, "learning_rate": 0.0026911433604691593, "loss": 1.2433, "step": 3285 }, { "epoch": 0.28815517059475576, "grad_norm": 0.11083984375, "learning_rate": 0.0026908837919704534, "loss": 1.246, "step": 3286 }, { "epoch": 0.2882428623691303, "grad_norm": 0.119140625, "learning_rate": 0.002690624128545833, "loss": 1.2593, "step": 3287 }, { "epoch": 0.28833055414350484, "grad_norm": 0.08349609375, "learning_rate": 0.0026903643702189776, "loss": 1.2447, "step": 3288 }, { "epoch": 0.2884182459178794, "grad_norm": 0.06884765625, "learning_rate": 0.0026901045170135782, "loss": 1.2277, "step": 3289 }, { "epoch": 0.28850593769225397, "grad_norm": 0.0712890625, "learning_rate": 0.0026898445689533313, "loss": 1.3009, "step": 3290 }, { "epoch": 0.2885936294666285, "grad_norm": 0.06787109375, "learning_rate": 0.002689584526061944, "loss": 1.2273, "step": 3291 }, { "epoch": 0.28868132124100304, "grad_norm": 0.11767578125, "learning_rate": 0.0026893243883631315, "loss": 1.2788, "step": 3292 }, { "epoch": 0.28876901301537755, "grad_norm": 0.060791015625, "learning_rate": 0.0026890641558806184, "loss": 1.2454, "step": 3293 }, { "epoch": 0.2888567047897521, "grad_norm": 0.080078125, "learning_rate": 0.0026888038286381364, "loss": 1.2481, "step": 3294 }, { "epoch": 0.2889443965641267, "grad_norm": 0.05615234375, "learning_rate": 0.002688543406659427, "loss": 1.2221, "step": 3295 }, { "epoch": 0.2890320883385012, "grad_norm": 0.12109375, "learning_rate": 0.0026882828899682395, "loss": 1.2275, "step": 3296 }, { "epoch": 0.28911978011287576, "grad_norm": 0.115234375, "learning_rate": 0.002688022278588332, "loss": 1.2774, "step": 3297 }, { "epoch": 0.2892074718872503, "grad_norm": 0.062255859375, "learning_rate": 0.0026877615725434735, "loss": 1.2696, "step": 3298 }, { "epoch": 0.28929516366162483, "grad_norm": 0.07421875, "learning_rate": 0.002687500771857439, "loss": 1.2933, "step": 3299 }, { "epoch": 0.2893828554359994, "grad_norm": 0.10400390625, "learning_rate": 0.002687239876554011, "loss": 1.2868, "step": 3300 }, { "epoch": 0.28947054721037396, "grad_norm": 0.09521484375, "learning_rate": 0.002686978886656985, "loss": 1.2629, "step": 3301 }, { "epoch": 0.2895582389847485, "grad_norm": 0.07958984375, "learning_rate": 0.002686717802190161, "loss": 1.2342, "step": 3302 }, { "epoch": 0.28964593075912304, "grad_norm": 0.1064453125, "learning_rate": 0.00268645662317735, "loss": 1.295, "step": 3303 }, { "epoch": 0.28973362253349755, "grad_norm": 0.07080078125, "learning_rate": 0.00268619534964237, "loss": 1.2275, "step": 3304 }, { "epoch": 0.2898213143078721, "grad_norm": 0.11474609375, "learning_rate": 0.00268593398160905, "loss": 1.2433, "step": 3305 }, { "epoch": 0.2899090060822467, "grad_norm": 0.07666015625, "learning_rate": 0.0026856725191012234, "loss": 1.2159, "step": 3306 }, { "epoch": 0.2899966978566212, "grad_norm": 0.11279296875, "learning_rate": 0.0026854109621427384, "loss": 1.2478, "step": 3307 }, { "epoch": 0.29008438963099575, "grad_norm": 0.0849609375, "learning_rate": 0.0026851493107574458, "loss": 1.2661, "step": 3308 }, { "epoch": 0.2901720814053703, "grad_norm": 0.123046875, "learning_rate": 0.002684887564969208, "loss": 1.2754, "step": 3309 }, { "epoch": 0.29025977317974483, "grad_norm": 0.06494140625, "learning_rate": 0.002684625724801896, "loss": 1.2175, "step": 3310 }, { "epoch": 0.2903474649541194, "grad_norm": 0.08349609375, "learning_rate": 0.002684363790279389, "loss": 1.2562, "step": 3311 }, { "epoch": 0.29043515672849396, "grad_norm": 0.062255859375, "learning_rate": 0.0026841017614255744, "loss": 1.2744, "step": 3312 }, { "epoch": 0.29052284850286847, "grad_norm": 0.076171875, "learning_rate": 0.002683839638264349, "loss": 1.2568, "step": 3313 }, { "epoch": 0.29061054027724303, "grad_norm": 0.091796875, "learning_rate": 0.0026835774208196174, "loss": 1.2762, "step": 3314 }, { "epoch": 0.29069823205161754, "grad_norm": 0.05712890625, "learning_rate": 0.0026833151091152927, "loss": 1.2307, "step": 3315 }, { "epoch": 0.2907859238259921, "grad_norm": 0.06103515625, "learning_rate": 0.002683052703175298, "loss": 1.2944, "step": 3316 }, { "epoch": 0.2908736156003667, "grad_norm": 0.0869140625, "learning_rate": 0.0026827902030235637, "loss": 1.2738, "step": 3317 }, { "epoch": 0.2909613073747412, "grad_norm": 0.09619140625, "learning_rate": 0.002682527608684028, "loss": 1.2389, "step": 3318 }, { "epoch": 0.29104899914911575, "grad_norm": 0.0703125, "learning_rate": 0.002682264920180641, "loss": 1.2506, "step": 3319 }, { "epoch": 0.2911366909234903, "grad_norm": 0.056884765625, "learning_rate": 0.0026820021375373575, "loss": 1.2833, "step": 3320 }, { "epoch": 0.2912243826978648, "grad_norm": 0.06005859375, "learning_rate": 0.0026817392607781433, "loss": 1.3007, "step": 3321 }, { "epoch": 0.2913120744722394, "grad_norm": 0.0703125, "learning_rate": 0.002681476289926972, "loss": 1.2501, "step": 3322 }, { "epoch": 0.29139976624661396, "grad_norm": 0.0693359375, "learning_rate": 0.0026812132250078257, "loss": 1.2945, "step": 3323 }, { "epoch": 0.29148745802098847, "grad_norm": 0.059326171875, "learning_rate": 0.0026809500660446954, "loss": 1.2011, "step": 3324 }, { "epoch": 0.29157514979536303, "grad_norm": 0.0712890625, "learning_rate": 0.0026806868130615806, "loss": 1.2703, "step": 3325 }, { "epoch": 0.29166284156973754, "grad_norm": 0.0927734375, "learning_rate": 0.002680423466082489, "loss": 1.2925, "step": 3326 }, { "epoch": 0.2917505333441121, "grad_norm": 0.06494140625, "learning_rate": 0.0026801600251314364, "loss": 1.2185, "step": 3327 }, { "epoch": 0.29183822511848667, "grad_norm": 0.111328125, "learning_rate": 0.00267989649023245, "loss": 1.3242, "step": 3328 }, { "epoch": 0.2919259168928612, "grad_norm": 0.076171875, "learning_rate": 0.0026796328614095625, "loss": 1.2339, "step": 3329 }, { "epoch": 0.29201360866723575, "grad_norm": 0.072265625, "learning_rate": 0.002679369138686816, "loss": 1.2168, "step": 3330 }, { "epoch": 0.2921013004416103, "grad_norm": 0.078125, "learning_rate": 0.0026791053220882603, "loss": 1.2098, "step": 3331 }, { "epoch": 0.2921889922159848, "grad_norm": 0.06884765625, "learning_rate": 0.0026788414116379567, "loss": 1.2854, "step": 3332 }, { "epoch": 0.2922766839903594, "grad_norm": 0.08544921875, "learning_rate": 0.002678577407359972, "loss": 1.2881, "step": 3333 }, { "epoch": 0.29236437576473395, "grad_norm": 0.05908203125, "learning_rate": 0.0026783133092783836, "loss": 1.2091, "step": 3334 }, { "epoch": 0.29245206753910846, "grad_norm": 0.10791015625, "learning_rate": 0.002678049117417275, "loss": 1.3324, "step": 3335 }, { "epoch": 0.292539759313483, "grad_norm": 0.158203125, "learning_rate": 0.0026777848318007415, "loss": 1.2052, "step": 3336 }, { "epoch": 0.2926274510878576, "grad_norm": 0.064453125, "learning_rate": 0.0026775204524528843, "loss": 1.2421, "step": 3337 }, { "epoch": 0.2927151428622321, "grad_norm": 0.052734375, "learning_rate": 0.0026772559793978145, "loss": 1.2306, "step": 3338 }, { "epoch": 0.29280283463660667, "grad_norm": 0.07421875, "learning_rate": 0.002676991412659651, "loss": 1.2014, "step": 3339 }, { "epoch": 0.2928905264109812, "grad_norm": 0.0703125, "learning_rate": 0.0026767267522625217, "loss": 1.191, "step": 3340 }, { "epoch": 0.29297821818535574, "grad_norm": 0.06884765625, "learning_rate": 0.0026764619982305635, "loss": 1.3147, "step": 3341 }, { "epoch": 0.2930659099597303, "grad_norm": 0.058837890625, "learning_rate": 0.0026761971505879206, "loss": 1.2417, "step": 3342 }, { "epoch": 0.2931536017341048, "grad_norm": 0.054443359375, "learning_rate": 0.002675932209358747, "loss": 1.3209, "step": 3343 }, { "epoch": 0.2932412935084794, "grad_norm": 0.0927734375, "learning_rate": 0.002675667174567204, "loss": 1.2413, "step": 3344 }, { "epoch": 0.29332898528285395, "grad_norm": 0.09228515625, "learning_rate": 0.0026754020462374625, "loss": 1.2456, "step": 3345 }, { "epoch": 0.29341667705722846, "grad_norm": 0.11083984375, "learning_rate": 0.0026751368243937016, "loss": 1.214, "step": 3346 }, { "epoch": 0.293504368831603, "grad_norm": 0.11669921875, "learning_rate": 0.0026748715090601084, "loss": 1.2711, "step": 3347 }, { "epoch": 0.2935920606059776, "grad_norm": 0.09130859375, "learning_rate": 0.00267460610026088, "loss": 1.2349, "step": 3348 }, { "epoch": 0.2936797523803521, "grad_norm": 0.078125, "learning_rate": 0.0026743405980202196, "loss": 1.2649, "step": 3349 }, { "epoch": 0.29376744415472666, "grad_norm": 0.0673828125, "learning_rate": 0.002674075002362341, "loss": 1.3246, "step": 3350 }, { "epoch": 0.2938551359291012, "grad_norm": 0.056884765625, "learning_rate": 0.002673809313311466, "loss": 1.2454, "step": 3351 }, { "epoch": 0.29394282770347574, "grad_norm": 0.0947265625, "learning_rate": 0.002673543530891824, "loss": 1.2029, "step": 3352 }, { "epoch": 0.2940305194778503, "grad_norm": 0.054443359375, "learning_rate": 0.0026732776551276548, "loss": 1.2133, "step": 3353 }, { "epoch": 0.2941182112522248, "grad_norm": 0.1201171875, "learning_rate": 0.0026730116860432048, "loss": 1.3307, "step": 3354 }, { "epoch": 0.2942059030265994, "grad_norm": 0.08642578125, "learning_rate": 0.00267274562366273, "loss": 1.2216, "step": 3355 }, { "epoch": 0.29429359480097395, "grad_norm": 0.061279296875, "learning_rate": 0.002672479468010495, "loss": 1.2005, "step": 3356 }, { "epoch": 0.29438128657534846, "grad_norm": 0.09326171875, "learning_rate": 0.0026722132191107717, "loss": 1.2677, "step": 3357 }, { "epoch": 0.294468978349723, "grad_norm": 0.14453125, "learning_rate": 0.0026719468769878414, "loss": 1.1741, "step": 3358 }, { "epoch": 0.2945566701240976, "grad_norm": 0.0703125, "learning_rate": 0.002671680441665994, "loss": 1.2545, "step": 3359 }, { "epoch": 0.2946443618984721, "grad_norm": 0.2060546875, "learning_rate": 0.002671413913169528, "loss": 1.2385, "step": 3360 }, { "epoch": 0.29473205367284666, "grad_norm": 0.068359375, "learning_rate": 0.00267114729152275, "loss": 1.3083, "step": 3361 }, { "epoch": 0.29481974544722117, "grad_norm": 0.26953125, "learning_rate": 0.0026708805767499755, "loss": 1.2516, "step": 3362 }, { "epoch": 0.29490743722159574, "grad_norm": 0.07177734375, "learning_rate": 0.0026706137688755267, "loss": 1.2601, "step": 3363 }, { "epoch": 0.2949951289959703, "grad_norm": 0.251953125, "learning_rate": 0.0026703468679237377, "loss": 1.2893, "step": 3364 }, { "epoch": 0.2950828207703448, "grad_norm": 0.0888671875, "learning_rate": 0.002670079873918948, "loss": 1.2453, "step": 3365 }, { "epoch": 0.2951705125447194, "grad_norm": 0.115234375, "learning_rate": 0.0026698127868855074, "loss": 1.1641, "step": 3366 }, { "epoch": 0.29525820431909394, "grad_norm": 0.12890625, "learning_rate": 0.002669545606847773, "loss": 1.2329, "step": 3367 }, { "epoch": 0.29534589609346845, "grad_norm": 0.1083984375, "learning_rate": 0.0026692783338301117, "loss": 1.2091, "step": 3368 }, { "epoch": 0.295433587867843, "grad_norm": 0.1337890625, "learning_rate": 0.002669010967856897, "loss": 1.2099, "step": 3369 }, { "epoch": 0.2955212796422176, "grad_norm": 0.06494140625, "learning_rate": 0.002668743508952513, "loss": 1.264, "step": 3370 }, { "epoch": 0.2956089714165921, "grad_norm": 0.2041015625, "learning_rate": 0.00266847595714135, "loss": 1.2871, "step": 3371 }, { "epoch": 0.29569666319096666, "grad_norm": 0.07568359375, "learning_rate": 0.00266820831244781, "loss": 1.2587, "step": 3372 }, { "epoch": 0.2957843549653412, "grad_norm": 0.1728515625, "learning_rate": 0.0026679405748963, "loss": 1.2202, "step": 3373 }, { "epoch": 0.29587204673971573, "grad_norm": 0.07275390625, "learning_rate": 0.0026676727445112372, "loss": 1.3136, "step": 3374 }, { "epoch": 0.2959597385140903, "grad_norm": 0.130859375, "learning_rate": 0.0026674048213170473, "loss": 1.2218, "step": 3375 }, { "epoch": 0.2960474302884648, "grad_norm": 0.0966796875, "learning_rate": 0.0026671368053381643, "loss": 1.2736, "step": 3376 }, { "epoch": 0.2961351220628394, "grad_norm": 0.1123046875, "learning_rate": 0.0026668686965990304, "loss": 1.2682, "step": 3377 }, { "epoch": 0.29622281383721394, "grad_norm": 0.06689453125, "learning_rate": 0.002666600495124096, "loss": 1.2485, "step": 3378 }, { "epoch": 0.29631050561158845, "grad_norm": 0.06689453125, "learning_rate": 0.002666332200937821, "loss": 1.2082, "step": 3379 }, { "epoch": 0.296398197385963, "grad_norm": 0.07470703125, "learning_rate": 0.002666063814064673, "loss": 1.2185, "step": 3380 }, { "epoch": 0.2964858891603376, "grad_norm": 0.06689453125, "learning_rate": 0.0026657953345291282, "loss": 1.2966, "step": 3381 }, { "epoch": 0.2965735809347121, "grad_norm": 0.064453125, "learning_rate": 0.0026655267623556713, "loss": 1.2046, "step": 3382 }, { "epoch": 0.29666127270908665, "grad_norm": 0.068359375, "learning_rate": 0.002665258097568795, "loss": 1.2701, "step": 3383 }, { "epoch": 0.2967489644834612, "grad_norm": 0.0654296875, "learning_rate": 0.002664989340193001, "loss": 1.2523, "step": 3384 }, { "epoch": 0.29683665625783573, "grad_norm": 0.0849609375, "learning_rate": 0.002664720490252799, "loss": 1.3207, "step": 3385 }, { "epoch": 0.2969243480322103, "grad_norm": 0.072265625, "learning_rate": 0.0026644515477727082, "loss": 1.2201, "step": 3386 }, { "epoch": 0.2970120398065848, "grad_norm": 0.0693359375, "learning_rate": 0.002664182512777255, "loss": 1.2163, "step": 3387 }, { "epoch": 0.29709973158095937, "grad_norm": 0.0712890625, "learning_rate": 0.002663913385290975, "loss": 1.2385, "step": 3388 }, { "epoch": 0.29718742335533394, "grad_norm": 0.1728515625, "learning_rate": 0.002663644165338411, "loss": 1.2439, "step": 3389 }, { "epoch": 0.29727511512970844, "grad_norm": 0.11083984375, "learning_rate": 0.0026633748529441164, "loss": 1.2949, "step": 3390 }, { "epoch": 0.297362806904083, "grad_norm": 0.11669921875, "learning_rate": 0.002663105448132651, "loss": 1.2609, "step": 3391 }, { "epoch": 0.2974504986784576, "grad_norm": 0.0966796875, "learning_rate": 0.0026628359509285843, "loss": 1.2165, "step": 3392 }, { "epoch": 0.2975381904528321, "grad_norm": 0.1171875, "learning_rate": 0.0026625663613564932, "loss": 1.2357, "step": 3393 }, { "epoch": 0.29762588222720665, "grad_norm": 0.0908203125, "learning_rate": 0.0026622966794409638, "loss": 1.2594, "step": 3394 }, { "epoch": 0.2977135740015812, "grad_norm": 0.06982421875, "learning_rate": 0.0026620269052065907, "loss": 1.2195, "step": 3395 }, { "epoch": 0.2978012657759557, "grad_norm": 0.060302734375, "learning_rate": 0.0026617570386779765, "loss": 1.2352, "step": 3396 }, { "epoch": 0.2978889575503303, "grad_norm": 0.06787109375, "learning_rate": 0.002661487079879732, "loss": 1.2886, "step": 3397 }, { "epoch": 0.2979766493247048, "grad_norm": 0.05908203125, "learning_rate": 0.002661217028836477, "loss": 1.2258, "step": 3398 }, { "epoch": 0.29806434109907937, "grad_norm": 0.06298828125, "learning_rate": 0.0026609468855728394, "loss": 1.2711, "step": 3399 }, { "epoch": 0.29815203287345393, "grad_norm": 0.080078125, "learning_rate": 0.0026606766501134555, "loss": 1.146, "step": 3400 }, { "epoch": 0.29823972464782844, "grad_norm": 0.0888671875, "learning_rate": 0.00266040632248297, "loss": 1.2412, "step": 3401 }, { "epoch": 0.298327416422203, "grad_norm": 0.1337890625, "learning_rate": 0.0026601359027060363, "loss": 1.2674, "step": 3402 }, { "epoch": 0.29841510819657757, "grad_norm": 0.107421875, "learning_rate": 0.0026598653908073166, "loss": 1.2566, "step": 3403 }, { "epoch": 0.2985027999709521, "grad_norm": 0.1357421875, "learning_rate": 0.0026595947868114796, "loss": 1.2135, "step": 3404 }, { "epoch": 0.29859049174532665, "grad_norm": 0.09326171875, "learning_rate": 0.0026593240907432044, "loss": 1.2651, "step": 3405 }, { "epoch": 0.2986781835197012, "grad_norm": 0.06201171875, "learning_rate": 0.002659053302627178, "loss": 1.1689, "step": 3406 }, { "epoch": 0.2987658752940757, "grad_norm": 0.10595703125, "learning_rate": 0.0026587824224880946, "loss": 1.2475, "step": 3407 }, { "epoch": 0.2988535670684503, "grad_norm": 0.06494140625, "learning_rate": 0.002658511450350659, "loss": 1.325, "step": 3408 }, { "epoch": 0.29894125884282485, "grad_norm": 0.0751953125, "learning_rate": 0.0026582403862395825, "loss": 1.3258, "step": 3409 }, { "epoch": 0.29902895061719936, "grad_norm": 0.054931640625, "learning_rate": 0.0026579692301795855, "loss": 1.2492, "step": 3410 }, { "epoch": 0.29911664239157393, "grad_norm": 0.0673828125, "learning_rate": 0.0026576979821953963, "loss": 1.267, "step": 3411 }, { "epoch": 0.29920433416594844, "grad_norm": 0.091796875, "learning_rate": 0.0026574266423117525, "loss": 1.2172, "step": 3412 }, { "epoch": 0.299292025940323, "grad_norm": 0.061279296875, "learning_rate": 0.0026571552105534003, "loss": 1.2662, "step": 3413 }, { "epoch": 0.29937971771469757, "grad_norm": 0.08154296875, "learning_rate": 0.0026568836869450925, "loss": 1.2658, "step": 3414 }, { "epoch": 0.2994674094890721, "grad_norm": 0.06396484375, "learning_rate": 0.0026566120715115914, "loss": 1.2781, "step": 3415 }, { "epoch": 0.29955510126344664, "grad_norm": 0.07763671875, "learning_rate": 0.0026563403642776684, "loss": 1.2332, "step": 3416 }, { "epoch": 0.2996427930378212, "grad_norm": 0.062255859375, "learning_rate": 0.002656068565268102, "loss": 1.2331, "step": 3417 }, { "epoch": 0.2997304848121957, "grad_norm": 0.1103515625, "learning_rate": 0.002655796674507679, "loss": 1.2997, "step": 3418 }, { "epoch": 0.2998181765865703, "grad_norm": 0.11376953125, "learning_rate": 0.0026555246920211962, "loss": 1.308, "step": 3419 }, { "epoch": 0.29990586836094485, "grad_norm": 0.0703125, "learning_rate": 0.0026552526178334574, "loss": 1.3334, "step": 3420 }, { "epoch": 0.29999356013531936, "grad_norm": 0.0849609375, "learning_rate": 0.0026549804519692744, "loss": 1.2542, "step": 3421 }, { "epoch": 0.3000812519096939, "grad_norm": 0.058349609375, "learning_rate": 0.0026547081944534694, "loss": 1.3005, "step": 3422 }, { "epoch": 0.30016894368406843, "grad_norm": 0.0712890625, "learning_rate": 0.0026544358453108705, "loss": 1.1625, "step": 3423 }, { "epoch": 0.300256635458443, "grad_norm": 0.0634765625, "learning_rate": 0.0026541634045663147, "loss": 1.2191, "step": 3424 }, { "epoch": 0.30034432723281757, "grad_norm": 0.07666015625, "learning_rate": 0.0026538908722446492, "loss": 1.283, "step": 3425 }, { "epoch": 0.3004320190071921, "grad_norm": 0.06591796875, "learning_rate": 0.0026536182483707276, "loss": 1.2994, "step": 3426 }, { "epoch": 0.30051971078156664, "grad_norm": 0.07373046875, "learning_rate": 0.002653345532969412, "loss": 1.2045, "step": 3427 }, { "epoch": 0.3006074025559412, "grad_norm": 0.0634765625, "learning_rate": 0.0026530727260655747, "loss": 1.3135, "step": 3428 }, { "epoch": 0.3006950943303157, "grad_norm": 0.07177734375, "learning_rate": 0.002652799827684094, "loss": 1.2359, "step": 3429 }, { "epoch": 0.3007827861046903, "grad_norm": 0.0849609375, "learning_rate": 0.0026525268378498577, "loss": 1.2409, "step": 3430 }, { "epoch": 0.30087047787906485, "grad_norm": 0.06884765625, "learning_rate": 0.0026522537565877614, "loss": 1.2384, "step": 3431 }, { "epoch": 0.30095816965343936, "grad_norm": 0.08349609375, "learning_rate": 0.00265198058392271, "loss": 1.3228, "step": 3432 }, { "epoch": 0.3010458614278139, "grad_norm": 0.09326171875, "learning_rate": 0.0026517073198796164, "loss": 1.2428, "step": 3433 }, { "epoch": 0.30113355320218843, "grad_norm": 0.0869140625, "learning_rate": 0.002651433964483401, "loss": 1.2697, "step": 3434 }, { "epoch": 0.301221244976563, "grad_norm": 0.11328125, "learning_rate": 0.0026511605177589933, "loss": 1.1963, "step": 3435 }, { "epoch": 0.30130893675093756, "grad_norm": 0.10888671875, "learning_rate": 0.0026508869797313302, "loss": 1.2717, "step": 3436 }, { "epoch": 0.30139662852531207, "grad_norm": 0.11669921875, "learning_rate": 0.002650613350425359, "loss": 1.2591, "step": 3437 }, { "epoch": 0.30148432029968664, "grad_norm": 0.0869140625, "learning_rate": 0.002650339629866033, "loss": 1.2612, "step": 3438 }, { "epoch": 0.3015720120740612, "grad_norm": 0.07275390625, "learning_rate": 0.0026500658180783152, "loss": 1.3079, "step": 3439 }, { "epoch": 0.3016597038484357, "grad_norm": 0.09228515625, "learning_rate": 0.002649791915087176, "loss": 1.2756, "step": 3440 }, { "epoch": 0.3017473956228103, "grad_norm": 0.052978515625, "learning_rate": 0.0026495179209175958, "loss": 1.1832, "step": 3441 }, { "epoch": 0.30183508739718484, "grad_norm": 0.07861328125, "learning_rate": 0.0026492438355945613, "loss": 1.2552, "step": 3442 }, { "epoch": 0.30192277917155935, "grad_norm": 0.0546875, "learning_rate": 0.0026489696591430684, "loss": 1.2021, "step": 3443 }, { "epoch": 0.3020104709459339, "grad_norm": 0.09814453125, "learning_rate": 0.0026486953915881213, "loss": 1.2431, "step": 3444 }, { "epoch": 0.3020981627203085, "grad_norm": 0.06640625, "learning_rate": 0.0026484210329547325, "loss": 1.2315, "step": 3445 }, { "epoch": 0.302185854494683, "grad_norm": 0.061279296875, "learning_rate": 0.002648146583267923, "loss": 1.2433, "step": 3446 }, { "epoch": 0.30227354626905756, "grad_norm": 0.07666015625, "learning_rate": 0.002647872042552722, "loss": 1.2234, "step": 3447 }, { "epoch": 0.30236123804343207, "grad_norm": 0.068359375, "learning_rate": 0.0026475974108341664, "loss": 1.2378, "step": 3448 }, { "epoch": 0.30244892981780663, "grad_norm": 0.111328125, "learning_rate": 0.002647322688137303, "loss": 1.2849, "step": 3449 }, { "epoch": 0.3025366215921812, "grad_norm": 0.056640625, "learning_rate": 0.002647047874487184, "loss": 1.2414, "step": 3450 }, { "epoch": 0.3026243133665557, "grad_norm": 0.07861328125, "learning_rate": 0.0026467729699088736, "loss": 1.2915, "step": 3451 }, { "epoch": 0.3027120051409303, "grad_norm": 0.061279296875, "learning_rate": 0.002646497974427441, "loss": 1.2076, "step": 3452 }, { "epoch": 0.30279969691530484, "grad_norm": 0.07080078125, "learning_rate": 0.0026462228880679657, "loss": 1.2645, "step": 3453 }, { "epoch": 0.30288738868967935, "grad_norm": 0.13671875, "learning_rate": 0.0026459477108555346, "loss": 1.3133, "step": 3454 }, { "epoch": 0.3029750804640539, "grad_norm": 0.10498046875, "learning_rate": 0.0026456724428152436, "loss": 1.1937, "step": 3455 }, { "epoch": 0.3030627722384285, "grad_norm": 0.06591796875, "learning_rate": 0.002645397083972196, "loss": 1.2906, "step": 3456 }, { "epoch": 0.303150464012803, "grad_norm": 0.07080078125, "learning_rate": 0.002645121634351504, "loss": 1.2229, "step": 3457 }, { "epoch": 0.30323815578717755, "grad_norm": 0.0576171875, "learning_rate": 0.002644846093978287, "loss": 1.2788, "step": 3458 }, { "epoch": 0.30332584756155206, "grad_norm": 0.107421875, "learning_rate": 0.0026445704628776756, "loss": 1.2494, "step": 3459 }, { "epoch": 0.30341353933592663, "grad_norm": 0.08154296875, "learning_rate": 0.002644294741074805, "loss": 1.2429, "step": 3460 }, { "epoch": 0.3035012311103012, "grad_norm": 0.0791015625, "learning_rate": 0.002644018928594821, "loss": 1.2017, "step": 3461 }, { "epoch": 0.3035889228846757, "grad_norm": 0.06494140625, "learning_rate": 0.002643743025462876, "loss": 1.1755, "step": 3462 }, { "epoch": 0.30367661465905027, "grad_norm": 0.053466796875, "learning_rate": 0.002643467031704133, "loss": 1.2207, "step": 3463 }, { "epoch": 0.30376430643342484, "grad_norm": 0.060546875, "learning_rate": 0.0026431909473437613, "loss": 1.2552, "step": 3464 }, { "epoch": 0.30385199820779935, "grad_norm": 0.0654296875, "learning_rate": 0.0026429147724069393, "loss": 1.2578, "step": 3465 }, { "epoch": 0.3039396899821739, "grad_norm": 0.0546875, "learning_rate": 0.0026426385069188525, "loss": 1.2522, "step": 3466 }, { "epoch": 0.3040273817565485, "grad_norm": 0.08544921875, "learning_rate": 0.0026423621509046973, "loss": 1.2501, "step": 3467 }, { "epoch": 0.304115073530923, "grad_norm": 0.062255859375, "learning_rate": 0.0026420857043896752, "loss": 1.1917, "step": 3468 }, { "epoch": 0.30420276530529755, "grad_norm": 0.08056640625, "learning_rate": 0.002641809167398998, "loss": 1.2403, "step": 3469 }, { "epoch": 0.30429045707967206, "grad_norm": 0.064453125, "learning_rate": 0.002641532539957885, "loss": 1.2041, "step": 3470 }, { "epoch": 0.3043781488540466, "grad_norm": 0.06982421875, "learning_rate": 0.0026412558220915644, "loss": 1.2285, "step": 3471 }, { "epoch": 0.3044658406284212, "grad_norm": 0.05810546875, "learning_rate": 0.0026409790138252716, "loss": 1.3116, "step": 3472 }, { "epoch": 0.3045535324027957, "grad_norm": 0.173828125, "learning_rate": 0.0026407021151842506, "loss": 1.3356, "step": 3473 }, { "epoch": 0.30464122417717027, "grad_norm": 0.1689453125, "learning_rate": 0.002640425126193754, "loss": 1.2862, "step": 3474 }, { "epoch": 0.30472891595154483, "grad_norm": 0.1259765625, "learning_rate": 0.002640148046879043, "loss": 1.251, "step": 3475 }, { "epoch": 0.30481660772591934, "grad_norm": 0.1630859375, "learning_rate": 0.002639870877265386, "loss": 1.2721, "step": 3476 }, { "epoch": 0.3049042995002939, "grad_norm": 0.091796875, "learning_rate": 0.0026395936173780605, "loss": 1.2379, "step": 3477 }, { "epoch": 0.3049919912746685, "grad_norm": 0.1474609375, "learning_rate": 0.0026393162672423516, "loss": 1.2529, "step": 3478 }, { "epoch": 0.305079683049043, "grad_norm": 0.0615234375, "learning_rate": 0.002639038826883553, "loss": 1.2368, "step": 3479 }, { "epoch": 0.30516737482341755, "grad_norm": 0.07080078125, "learning_rate": 0.002638761296326967, "loss": 1.2901, "step": 3480 }, { "epoch": 0.3052550665977921, "grad_norm": 0.058837890625, "learning_rate": 0.0026384836755979036, "loss": 1.2721, "step": 3481 }, { "epoch": 0.3053427583721666, "grad_norm": 0.06494140625, "learning_rate": 0.00263820596472168, "loss": 1.2634, "step": 3482 }, { "epoch": 0.3054304501465412, "grad_norm": 0.059814453125, "learning_rate": 0.002637928163723623, "loss": 1.2373, "step": 3483 }, { "epoch": 0.3055181419209157, "grad_norm": 0.06396484375, "learning_rate": 0.0026376502726290683, "loss": 1.3242, "step": 3484 }, { "epoch": 0.30560583369529026, "grad_norm": 0.146484375, "learning_rate": 0.0026373722914633585, "loss": 1.2837, "step": 3485 }, { "epoch": 0.30569352546966483, "grad_norm": 0.055908203125, "learning_rate": 0.0026370942202518445, "loss": 1.226, "step": 3486 }, { "epoch": 0.30578121724403934, "grad_norm": 0.1884765625, "learning_rate": 0.002636816059019886, "loss": 1.2785, "step": 3487 }, { "epoch": 0.3058689090184139, "grad_norm": 0.09423828125, "learning_rate": 0.00263653780779285, "loss": 1.2589, "step": 3488 }, { "epoch": 0.30595660079278847, "grad_norm": 0.171875, "learning_rate": 0.002636259466596113, "loss": 1.2734, "step": 3489 }, { "epoch": 0.306044292567163, "grad_norm": 0.1796875, "learning_rate": 0.0026359810354550585, "loss": 1.223, "step": 3490 }, { "epoch": 0.30613198434153754, "grad_norm": 0.1416015625, "learning_rate": 0.002635702514395079, "loss": 1.242, "step": 3491 }, { "epoch": 0.3062196761159121, "grad_norm": 0.2255859375, "learning_rate": 0.002635423903441574, "loss": 1.2676, "step": 3492 }, { "epoch": 0.3063073678902866, "grad_norm": 0.07373046875, "learning_rate": 0.002635145202619954, "loss": 1.1832, "step": 3493 }, { "epoch": 0.3063950596646612, "grad_norm": 0.1201171875, "learning_rate": 0.0026348664119556337, "loss": 1.2403, "step": 3494 }, { "epoch": 0.3064827514390357, "grad_norm": 0.12060546875, "learning_rate": 0.00263458753147404, "loss": 1.2787, "step": 3495 }, { "epoch": 0.30657044321341026, "grad_norm": 0.171875, "learning_rate": 0.0026343085612006044, "loss": 1.2931, "step": 3496 }, { "epoch": 0.3066581349877848, "grad_norm": 0.0869140625, "learning_rate": 0.0026340295011607695, "loss": 1.2858, "step": 3497 }, { "epoch": 0.30674582676215933, "grad_norm": 0.1552734375, "learning_rate": 0.0026337503513799844, "loss": 1.2782, "step": 3498 }, { "epoch": 0.3068335185365339, "grad_norm": 0.109375, "learning_rate": 0.0026334711118837055, "loss": 1.2819, "step": 3499 }, { "epoch": 0.30692121031090847, "grad_norm": 0.13671875, "learning_rate": 0.0026331917826974013, "loss": 1.2416, "step": 3500 }, { "epoch": 0.30692121031090847, "eval_loss": 1.26096510887146, "eval_runtime": 429.3945, "eval_samples_per_second": 33.645, "eval_steps_per_second": 8.412, "step": 3500 }, { "epoch": 0.307008902085283, "grad_norm": 0.13671875, "learning_rate": 0.0026329123638465443, "loss": 1.2595, "step": 3501 }, { "epoch": 0.30709659385965754, "grad_norm": 0.1767578125, "learning_rate": 0.0026326328553566173, "loss": 1.2682, "step": 3502 }, { "epoch": 0.3071842856340321, "grad_norm": 0.1044921875, "learning_rate": 0.0026323532572531103, "loss": 1.2691, "step": 3503 }, { "epoch": 0.3072719774084066, "grad_norm": 0.150390625, "learning_rate": 0.0026320735695615225, "loss": 1.2959, "step": 3504 }, { "epoch": 0.3073596691827812, "grad_norm": 0.0908203125, "learning_rate": 0.00263179379230736, "loss": 1.2174, "step": 3505 }, { "epoch": 0.3074473609571557, "grad_norm": 0.08935546875, "learning_rate": 0.002631513925516138, "loss": 1.2131, "step": 3506 }, { "epoch": 0.30753505273153026, "grad_norm": 0.099609375, "learning_rate": 0.00263123396921338, "loss": 1.1935, "step": 3507 }, { "epoch": 0.3076227445059048, "grad_norm": 0.07373046875, "learning_rate": 0.002630953923424617, "loss": 1.2575, "step": 3508 }, { "epoch": 0.30771043628027933, "grad_norm": 0.1591796875, "learning_rate": 0.0026306737881753887, "loss": 1.2996, "step": 3509 }, { "epoch": 0.3077981280546539, "grad_norm": 0.055419921875, "learning_rate": 0.0026303935634912423, "loss": 1.2054, "step": 3510 }, { "epoch": 0.30788581982902846, "grad_norm": 0.0986328125, "learning_rate": 0.0026301132493977333, "loss": 1.269, "step": 3511 }, { "epoch": 0.30797351160340297, "grad_norm": 0.07080078125, "learning_rate": 0.0026298328459204265, "loss": 1.2294, "step": 3512 }, { "epoch": 0.30806120337777754, "grad_norm": 0.11669921875, "learning_rate": 0.002629552353084894, "loss": 1.2675, "step": 3513 }, { "epoch": 0.3081488951521521, "grad_norm": 0.0849609375, "learning_rate": 0.0026292717709167153, "loss": 1.2935, "step": 3514 }, { "epoch": 0.3082365869265266, "grad_norm": 0.1015625, "learning_rate": 0.002628991099441479, "loss": 1.2546, "step": 3515 }, { "epoch": 0.3083242787009012, "grad_norm": 0.09130859375, "learning_rate": 0.002628710338684782, "loss": 1.2042, "step": 3516 }, { "epoch": 0.30841197047527574, "grad_norm": 0.10595703125, "learning_rate": 0.002628429488672228, "loss": 1.2703, "step": 3517 }, { "epoch": 0.30849966224965025, "grad_norm": 0.09375, "learning_rate": 0.0026281485494294306, "loss": 1.1922, "step": 3518 }, { "epoch": 0.3085873540240248, "grad_norm": 0.0830078125, "learning_rate": 0.002627867520982011, "loss": 1.2481, "step": 3519 }, { "epoch": 0.30867504579839933, "grad_norm": 0.06494140625, "learning_rate": 0.0026275864033555975, "loss": 1.2314, "step": 3520 }, { "epoch": 0.3087627375727739, "grad_norm": 0.1376953125, "learning_rate": 0.0026273051965758284, "loss": 1.225, "step": 3521 }, { "epoch": 0.30885042934714846, "grad_norm": 0.1015625, "learning_rate": 0.0026270239006683477, "loss": 1.1915, "step": 3522 }, { "epoch": 0.30893812112152297, "grad_norm": 0.111328125, "learning_rate": 0.002626742515658809, "loss": 1.2008, "step": 3523 }, { "epoch": 0.30902581289589753, "grad_norm": 0.07421875, "learning_rate": 0.0026264610415728746, "loss": 1.2457, "step": 3524 }, { "epoch": 0.3091135046702721, "grad_norm": 0.103515625, "learning_rate": 0.0026261794784362144, "loss": 1.2982, "step": 3525 }, { "epoch": 0.3092011964446466, "grad_norm": 0.06982421875, "learning_rate": 0.002625897826274505, "loss": 1.2267, "step": 3526 }, { "epoch": 0.3092888882190212, "grad_norm": 0.10107421875, "learning_rate": 0.0026256160851134335, "loss": 1.2864, "step": 3527 }, { "epoch": 0.30937657999339574, "grad_norm": 0.06884765625, "learning_rate": 0.0026253342549786935, "loss": 1.2724, "step": 3528 }, { "epoch": 0.30946427176777025, "grad_norm": 0.1025390625, "learning_rate": 0.002625052335895988, "loss": 1.2153, "step": 3529 }, { "epoch": 0.3095519635421448, "grad_norm": 0.06396484375, "learning_rate": 0.0026247703278910253, "loss": 1.2986, "step": 3530 }, { "epoch": 0.3096396553165193, "grad_norm": 0.072265625, "learning_rate": 0.002624488230989526, "loss": 1.25, "step": 3531 }, { "epoch": 0.3097273470908939, "grad_norm": 0.11376953125, "learning_rate": 0.0026242060452172154, "loss": 1.238, "step": 3532 }, { "epoch": 0.30981503886526846, "grad_norm": 0.08251953125, "learning_rate": 0.002623923770599828, "loss": 1.2193, "step": 3533 }, { "epoch": 0.30990273063964296, "grad_norm": 0.107421875, "learning_rate": 0.0026236414071631074, "loss": 1.313, "step": 3534 }, { "epoch": 0.30999042241401753, "grad_norm": 0.0771484375, "learning_rate": 0.0026233589549328035, "loss": 1.1913, "step": 3535 }, { "epoch": 0.3100781141883921, "grad_norm": 0.1240234375, "learning_rate": 0.0026230764139346754, "loss": 1.2406, "step": 3536 }, { "epoch": 0.3101658059627666, "grad_norm": 0.056396484375, "learning_rate": 0.002622793784194491, "loss": 1.2412, "step": 3537 }, { "epoch": 0.31025349773714117, "grad_norm": 0.0673828125, "learning_rate": 0.002622511065738024, "loss": 1.2744, "step": 3538 }, { "epoch": 0.31034118951151574, "grad_norm": 0.0634765625, "learning_rate": 0.002622228258591059, "loss": 1.2583, "step": 3539 }, { "epoch": 0.31042888128589025, "grad_norm": 0.06982421875, "learning_rate": 0.0026219453627793865, "loss": 1.2519, "step": 3540 }, { "epoch": 0.3105165730602648, "grad_norm": 0.10986328125, "learning_rate": 0.002621662378328806, "loss": 1.2772, "step": 3541 }, { "epoch": 0.3106042648346393, "grad_norm": 0.0634765625, "learning_rate": 0.002621379305265125, "loss": 1.2914, "step": 3542 }, { "epoch": 0.3106919566090139, "grad_norm": 0.07666015625, "learning_rate": 0.0026210961436141586, "loss": 1.2564, "step": 3543 }, { "epoch": 0.31077964838338845, "grad_norm": 0.09814453125, "learning_rate": 0.002620812893401731, "loss": 1.2265, "step": 3544 }, { "epoch": 0.31086734015776296, "grad_norm": 0.0634765625, "learning_rate": 0.002620529554653674, "loss": 1.2415, "step": 3545 }, { "epoch": 0.3109550319321375, "grad_norm": 0.126953125, "learning_rate": 0.002620246127395826, "loss": 1.3464, "step": 3546 }, { "epoch": 0.3110427237065121, "grad_norm": 0.07763671875, "learning_rate": 0.002619962611654037, "loss": 1.1727, "step": 3547 }, { "epoch": 0.3111304154808866, "grad_norm": 0.1650390625, "learning_rate": 0.002619679007454161, "loss": 1.245, "step": 3548 }, { "epoch": 0.31121810725526117, "grad_norm": 0.11962890625, "learning_rate": 0.002619395314822063, "loss": 1.2309, "step": 3549 }, { "epoch": 0.31130579902963573, "grad_norm": 0.1083984375, "learning_rate": 0.002619111533783615, "loss": 1.205, "step": 3550 }, { "epoch": 0.31139349080401024, "grad_norm": 0.10498046875, "learning_rate": 0.0026188276643646963, "loss": 1.1988, "step": 3551 }, { "epoch": 0.3114811825783848, "grad_norm": 0.08642578125, "learning_rate": 0.0026185437065911962, "loss": 1.2493, "step": 3552 }, { "epoch": 0.3115688743527593, "grad_norm": 0.07568359375, "learning_rate": 0.0026182596604890102, "loss": 1.186, "step": 3553 }, { "epoch": 0.3116565661271339, "grad_norm": 0.1201171875, "learning_rate": 0.0026179755260840425, "loss": 1.3243, "step": 3554 }, { "epoch": 0.31174425790150845, "grad_norm": 0.1103515625, "learning_rate": 0.002617691303402206, "loss": 1.2698, "step": 3555 }, { "epoch": 0.31183194967588296, "grad_norm": 0.0654296875, "learning_rate": 0.00261740699246942, "loss": 1.2901, "step": 3556 }, { "epoch": 0.3119196414502575, "grad_norm": 0.1416015625, "learning_rate": 0.0026171225933116138, "loss": 1.2743, "step": 3557 }, { "epoch": 0.3120073332246321, "grad_norm": 0.0576171875, "learning_rate": 0.0026168381059547244, "loss": 1.2561, "step": 3558 }, { "epoch": 0.3120950249990066, "grad_norm": 0.134765625, "learning_rate": 0.002616553530424695, "loss": 1.199, "step": 3559 }, { "epoch": 0.31218271677338116, "grad_norm": 0.058837890625, "learning_rate": 0.0026162688667474795, "loss": 1.3206, "step": 3560 }, { "epoch": 0.31227040854775573, "grad_norm": 0.1796875, "learning_rate": 0.0026159841149490373, "loss": 1.2078, "step": 3561 }, { "epoch": 0.31235810032213024, "grad_norm": 0.11279296875, "learning_rate": 0.0026156992750553374, "loss": 1.2163, "step": 3562 }, { "epoch": 0.3124457920965048, "grad_norm": 0.15625, "learning_rate": 0.002615414347092356, "loss": 1.2781, "step": 3563 }, { "epoch": 0.31253348387087937, "grad_norm": 0.12255859375, "learning_rate": 0.002615129331086079, "loss": 1.2727, "step": 3564 }, { "epoch": 0.3126211756452539, "grad_norm": 0.09033203125, "learning_rate": 0.0026148442270624984, "loss": 1.3143, "step": 3565 }, { "epoch": 0.31270886741962844, "grad_norm": 0.09375, "learning_rate": 0.0026145590350476152, "loss": 1.2293, "step": 3566 }, { "epoch": 0.31279655919400295, "grad_norm": 0.08447265625, "learning_rate": 0.0026142737550674377, "loss": 1.2405, "step": 3567 }, { "epoch": 0.3128842509683775, "grad_norm": 0.12353515625, "learning_rate": 0.0026139883871479836, "loss": 1.2473, "step": 3568 }, { "epoch": 0.3129719427427521, "grad_norm": 0.07177734375, "learning_rate": 0.0026137029313152765, "loss": 1.2754, "step": 3569 }, { "epoch": 0.3130596345171266, "grad_norm": 0.10498046875, "learning_rate": 0.0026134173875953497, "loss": 1.2293, "step": 3570 }, { "epoch": 0.31314732629150116, "grad_norm": 0.060302734375, "learning_rate": 0.0026131317560142446, "loss": 1.1679, "step": 3571 }, { "epoch": 0.3132350180658757, "grad_norm": 0.1337890625, "learning_rate": 0.0026128460365980096, "loss": 1.3056, "step": 3572 }, { "epoch": 0.31332270984025024, "grad_norm": 0.06689453125, "learning_rate": 0.0026125602293727017, "loss": 1.2326, "step": 3573 }, { "epoch": 0.3134104016146248, "grad_norm": 0.11181640625, "learning_rate": 0.0026122743343643855, "loss": 1.2297, "step": 3574 }, { "epoch": 0.31349809338899937, "grad_norm": 0.142578125, "learning_rate": 0.0026119883515991348, "loss": 1.1976, "step": 3575 }, { "epoch": 0.3135857851633739, "grad_norm": 0.0703125, "learning_rate": 0.002611702281103029, "loss": 1.1928, "step": 3576 }, { "epoch": 0.31367347693774844, "grad_norm": 0.078125, "learning_rate": 0.0026114161229021577, "loss": 1.2385, "step": 3577 }, { "epoch": 0.31376116871212295, "grad_norm": 0.0625, "learning_rate": 0.0026111298770226184, "loss": 1.2769, "step": 3578 }, { "epoch": 0.3138488604864975, "grad_norm": 0.0654296875, "learning_rate": 0.002610843543490515, "loss": 1.2345, "step": 3579 }, { "epoch": 0.3139365522608721, "grad_norm": 0.06640625, "learning_rate": 0.0026105571223319613, "loss": 1.2379, "step": 3580 }, { "epoch": 0.3140242440352466, "grad_norm": 0.053466796875, "learning_rate": 0.0026102706135730774, "loss": 1.2695, "step": 3581 }, { "epoch": 0.31411193580962116, "grad_norm": 0.08203125, "learning_rate": 0.0026099840172399925, "loss": 1.2387, "step": 3582 }, { "epoch": 0.3141996275839957, "grad_norm": 0.06494140625, "learning_rate": 0.0026096973333588434, "loss": 1.284, "step": 3583 }, { "epoch": 0.31428731935837023, "grad_norm": 0.07861328125, "learning_rate": 0.0026094105619557753, "loss": 1.1646, "step": 3584 }, { "epoch": 0.3143750111327448, "grad_norm": 0.0732421875, "learning_rate": 0.0026091237030569404, "loss": 1.2355, "step": 3585 }, { "epoch": 0.31446270290711936, "grad_norm": 0.07275390625, "learning_rate": 0.0026088367566885, "loss": 1.272, "step": 3586 }, { "epoch": 0.3145503946814939, "grad_norm": 0.0751953125, "learning_rate": 0.0026085497228766218, "loss": 1.3109, "step": 3587 }, { "epoch": 0.31463808645586844, "grad_norm": 0.07373046875, "learning_rate": 0.002608262601647484, "loss": 1.2305, "step": 3588 }, { "epoch": 0.31472577823024295, "grad_norm": 0.201171875, "learning_rate": 0.0026079753930272707, "loss": 1.2498, "step": 3589 }, { "epoch": 0.3148134700046175, "grad_norm": 0.076171875, "learning_rate": 0.002607688097042174, "loss": 1.2388, "step": 3590 }, { "epoch": 0.3149011617789921, "grad_norm": 0.2412109375, "learning_rate": 0.002607400713718396, "loss": 1.2758, "step": 3591 }, { "epoch": 0.3149888535533666, "grad_norm": 0.125, "learning_rate": 0.002607113243082144, "loss": 1.2962, "step": 3592 }, { "epoch": 0.31507654532774115, "grad_norm": 0.248046875, "learning_rate": 0.0026068256851596344, "loss": 1.2127, "step": 3593 }, { "epoch": 0.3151642371021157, "grad_norm": 0.134765625, "learning_rate": 0.0026065380399770935, "loss": 1.2487, "step": 3594 }, { "epoch": 0.31525192887649023, "grad_norm": 0.3515625, "learning_rate": 0.0026062503075607517, "loss": 1.3183, "step": 3595 }, { "epoch": 0.3153396206508648, "grad_norm": 0.1025390625, "learning_rate": 0.002605962487936851, "loss": 1.2079, "step": 3596 }, { "epoch": 0.31542731242523936, "grad_norm": 0.28515625, "learning_rate": 0.0026056745811316386, "loss": 1.229, "step": 3597 }, { "epoch": 0.31551500419961387, "grad_norm": 0.16015625, "learning_rate": 0.002605386587171372, "loss": 1.2323, "step": 3598 }, { "epoch": 0.31560269597398843, "grad_norm": 0.1552734375, "learning_rate": 0.0026050985060823146, "loss": 1.2513, "step": 3599 }, { "epoch": 0.315690387748363, "grad_norm": 0.1787109375, "learning_rate": 0.002604810337890739, "loss": 1.2235, "step": 3600 }, { "epoch": 0.3157780795227375, "grad_norm": 0.07177734375, "learning_rate": 0.002604522082622926, "loss": 1.2742, "step": 3601 }, { "epoch": 0.3158657712971121, "grad_norm": 0.1357421875, "learning_rate": 0.0026042337403051627, "loss": 1.2944, "step": 3602 }, { "epoch": 0.3159534630714866, "grad_norm": 0.09130859375, "learning_rate": 0.002603945310963746, "loss": 1.2576, "step": 3603 }, { "epoch": 0.31604115484586115, "grad_norm": 0.08837890625, "learning_rate": 0.002603656794624979, "loss": 1.2208, "step": 3604 }, { "epoch": 0.3161288466202357, "grad_norm": 0.08837890625, "learning_rate": 0.0026033681913151746, "loss": 1.2679, "step": 3605 }, { "epoch": 0.3162165383946102, "grad_norm": 0.07568359375, "learning_rate": 0.002603079501060652, "loss": 1.2825, "step": 3606 }, { "epoch": 0.3163042301689848, "grad_norm": 0.09423828125, "learning_rate": 0.0026027907238877395, "loss": 1.2243, "step": 3607 }, { "epoch": 0.31639192194335936, "grad_norm": 0.06884765625, "learning_rate": 0.002602501859822772, "loss": 1.2512, "step": 3608 }, { "epoch": 0.31647961371773387, "grad_norm": 0.059814453125, "learning_rate": 0.0026022129088920944, "loss": 1.1959, "step": 3609 }, { "epoch": 0.31656730549210843, "grad_norm": 0.06640625, "learning_rate": 0.002601923871122057, "loss": 1.207, "step": 3610 }, { "epoch": 0.316654997266483, "grad_norm": 0.064453125, "learning_rate": 0.0026016347465390205, "loss": 1.2474, "step": 3611 }, { "epoch": 0.3167426890408575, "grad_norm": 0.0751953125, "learning_rate": 0.0026013455351693517, "loss": 1.2829, "step": 3612 }, { "epoch": 0.31683038081523207, "grad_norm": 0.1181640625, "learning_rate": 0.002601056237039426, "loss": 1.238, "step": 3613 }, { "epoch": 0.3169180725896066, "grad_norm": 0.09033203125, "learning_rate": 0.0026007668521756265, "loss": 1.2483, "step": 3614 }, { "epoch": 0.31700576436398115, "grad_norm": 0.09716796875, "learning_rate": 0.0026004773806043444, "loss": 1.3106, "step": 3615 }, { "epoch": 0.3170934561383557, "grad_norm": 0.10302734375, "learning_rate": 0.0026001878223519794, "loss": 1.2445, "step": 3616 }, { "epoch": 0.3171811479127302, "grad_norm": 0.07275390625, "learning_rate": 0.0025998981774449374, "loss": 1.2775, "step": 3617 }, { "epoch": 0.3172688396871048, "grad_norm": 0.0927734375, "learning_rate": 0.0025996084459096336, "loss": 1.2309, "step": 3618 }, { "epoch": 0.31735653146147935, "grad_norm": 0.0751953125, "learning_rate": 0.002599318627772491, "loss": 1.2376, "step": 3619 }, { "epoch": 0.31744422323585386, "grad_norm": 0.078125, "learning_rate": 0.0025990287230599402, "loss": 1.2988, "step": 3620 }, { "epoch": 0.3175319150102284, "grad_norm": 0.057861328125, "learning_rate": 0.0025987387317984198, "loss": 1.2367, "step": 3621 }, { "epoch": 0.317619606784603, "grad_norm": 0.10888671875, "learning_rate": 0.0025984486540143767, "loss": 1.2341, "step": 3622 }, { "epoch": 0.3177072985589775, "grad_norm": 0.0595703125, "learning_rate": 0.002598158489734264, "loss": 1.2686, "step": 3623 }, { "epoch": 0.31779499033335207, "grad_norm": 0.0732421875, "learning_rate": 0.0025978682389845454, "loss": 1.2781, "step": 3624 }, { "epoch": 0.3178826821077266, "grad_norm": 0.05615234375, "learning_rate": 0.0025975779017916897, "loss": 1.2292, "step": 3625 }, { "epoch": 0.31797037388210114, "grad_norm": 0.052978515625, "learning_rate": 0.002597287478182176, "loss": 1.2101, "step": 3626 }, { "epoch": 0.3180580656564757, "grad_norm": 0.0546875, "learning_rate": 0.002596996968182489, "loss": 1.2497, "step": 3627 }, { "epoch": 0.3181457574308502, "grad_norm": 0.0576171875, "learning_rate": 0.0025967063718191244, "loss": 1.2101, "step": 3628 }, { "epoch": 0.3182334492052248, "grad_norm": 0.056396484375, "learning_rate": 0.0025964156891185816, "loss": 1.1936, "step": 3629 }, { "epoch": 0.31832114097959935, "grad_norm": 0.06884765625, "learning_rate": 0.002596124920107371, "loss": 1.2458, "step": 3630 }, { "epoch": 0.31840883275397386, "grad_norm": 0.083984375, "learning_rate": 0.0025958340648120103, "loss": 1.2493, "step": 3631 }, { "epoch": 0.3184965245283484, "grad_norm": 0.061279296875, "learning_rate": 0.002595543123259025, "loss": 1.2926, "step": 3632 }, { "epoch": 0.318584216302723, "grad_norm": 0.10986328125, "learning_rate": 0.0025952520954749475, "loss": 1.2266, "step": 3633 }, { "epoch": 0.3186719080770975, "grad_norm": 0.06982421875, "learning_rate": 0.002594960981486319, "loss": 1.2825, "step": 3634 }, { "epoch": 0.31875959985147206, "grad_norm": 0.177734375, "learning_rate": 0.002594669781319688, "loss": 1.2928, "step": 3635 }, { "epoch": 0.31884729162584663, "grad_norm": 0.09521484375, "learning_rate": 0.0025943784950016122, "loss": 1.2842, "step": 3636 }, { "epoch": 0.31893498340022114, "grad_norm": 0.07373046875, "learning_rate": 0.0025940871225586556, "loss": 1.227, "step": 3637 }, { "epoch": 0.3190226751745957, "grad_norm": 0.06396484375, "learning_rate": 0.00259379566401739, "loss": 1.3326, "step": 3638 }, { "epoch": 0.3191103669489702, "grad_norm": 0.05810546875, "learning_rate": 0.002593504119404397, "loss": 1.19, "step": 3639 }, { "epoch": 0.3191980587233448, "grad_norm": 0.06689453125, "learning_rate": 0.002593212488746263, "loss": 1.253, "step": 3640 }, { "epoch": 0.31928575049771935, "grad_norm": 0.1376953125, "learning_rate": 0.002592920772069586, "loss": 1.2451, "step": 3641 }, { "epoch": 0.31937344227209385, "grad_norm": 0.0859375, "learning_rate": 0.0025926289694009675, "loss": 1.1906, "step": 3642 }, { "epoch": 0.3194611340464684, "grad_norm": 0.12890625, "learning_rate": 0.0025923370807670217, "loss": 1.2645, "step": 3643 }, { "epoch": 0.319548825820843, "grad_norm": 0.1259765625, "learning_rate": 0.002592045106194366, "loss": 1.2032, "step": 3644 }, { "epoch": 0.3196365175952175, "grad_norm": 0.08740234375, "learning_rate": 0.0025917530457096284, "loss": 1.2641, "step": 3645 }, { "epoch": 0.31972420936959206, "grad_norm": 0.09326171875, "learning_rate": 0.0025914608993394444, "loss": 1.2134, "step": 3646 }, { "epoch": 0.3198119011439666, "grad_norm": 0.059814453125, "learning_rate": 0.0025911686671104563, "loss": 1.1948, "step": 3647 }, { "epoch": 0.31989959291834114, "grad_norm": 0.0634765625, "learning_rate": 0.002590876349049316, "loss": 1.2801, "step": 3648 }, { "epoch": 0.3199872846927157, "grad_norm": 0.10107421875, "learning_rate": 0.0025905839451826817, "loss": 1.2959, "step": 3649 }, { "epoch": 0.3200749764670902, "grad_norm": 0.07373046875, "learning_rate": 0.0025902914555372193, "loss": 1.2742, "step": 3650 }, { "epoch": 0.3201626682414648, "grad_norm": 0.078125, "learning_rate": 0.002589998880139603, "loss": 1.2424, "step": 3651 }, { "epoch": 0.32025036001583934, "grad_norm": 0.057861328125, "learning_rate": 0.002589706219016516, "loss": 1.3684, "step": 3652 }, { "epoch": 0.32033805179021385, "grad_norm": 0.07275390625, "learning_rate": 0.002589413472194648, "loss": 1.1947, "step": 3653 }, { "epoch": 0.3204257435645884, "grad_norm": 0.06591796875, "learning_rate": 0.002589120639700696, "loss": 1.2118, "step": 3654 }, { "epoch": 0.320513435338963, "grad_norm": 0.06103515625, "learning_rate": 0.002588827721561366, "loss": 1.2302, "step": 3655 }, { "epoch": 0.3206011271133375, "grad_norm": 0.07275390625, "learning_rate": 0.0025885347178033714, "loss": 1.28, "step": 3656 }, { "epoch": 0.32068881888771206, "grad_norm": 0.10595703125, "learning_rate": 0.002588241628453433, "loss": 1.247, "step": 3657 }, { "epoch": 0.3207765106620866, "grad_norm": 0.138671875, "learning_rate": 0.0025879484535382804, "loss": 1.2522, "step": 3658 }, { "epoch": 0.32086420243646113, "grad_norm": 0.111328125, "learning_rate": 0.0025876551930846506, "loss": 1.3613, "step": 3659 }, { "epoch": 0.3209518942108357, "grad_norm": 0.2236328125, "learning_rate": 0.0025873618471192873, "loss": 1.2587, "step": 3660 }, { "epoch": 0.3210395859852102, "grad_norm": 0.07568359375, "learning_rate": 0.002587068415668943, "loss": 1.27, "step": 3661 }, { "epoch": 0.3211272777595848, "grad_norm": 0.1396484375, "learning_rate": 0.0025867748987603786, "loss": 1.2276, "step": 3662 }, { "epoch": 0.32121496953395934, "grad_norm": 0.0869140625, "learning_rate": 0.0025864812964203616, "loss": 1.2125, "step": 3663 }, { "epoch": 0.32130266130833385, "grad_norm": 0.140625, "learning_rate": 0.0025861876086756682, "loss": 1.2489, "step": 3664 }, { "epoch": 0.3213903530827084, "grad_norm": 0.1123046875, "learning_rate": 0.0025858938355530817, "loss": 1.255, "step": 3665 }, { "epoch": 0.321478044857083, "grad_norm": 0.1455078125, "learning_rate": 0.002585599977079393, "loss": 1.2223, "step": 3666 }, { "epoch": 0.3215657366314575, "grad_norm": 0.11669921875, "learning_rate": 0.002585306033281402, "loss": 1.1807, "step": 3667 }, { "epoch": 0.32165342840583205, "grad_norm": 0.09814453125, "learning_rate": 0.002585012004185915, "loss": 1.225, "step": 3668 }, { "epoch": 0.3217411201802066, "grad_norm": 0.08642578125, "learning_rate": 0.002584717889819747, "loss": 1.3072, "step": 3669 }, { "epoch": 0.32182881195458113, "grad_norm": 0.1484375, "learning_rate": 0.0025844236902097203, "loss": 1.3573, "step": 3670 }, { "epoch": 0.3219165037289557, "grad_norm": 0.0693359375, "learning_rate": 0.002584129405382665, "loss": 1.2401, "step": 3671 }, { "epoch": 0.32200419550333026, "grad_norm": 0.10546875, "learning_rate": 0.0025838350353654194, "loss": 1.2768, "step": 3672 }, { "epoch": 0.32209188727770477, "grad_norm": 0.0673828125, "learning_rate": 0.00258354058018483, "loss": 1.1455, "step": 3673 }, { "epoch": 0.32217957905207933, "grad_norm": 0.053466796875, "learning_rate": 0.0025832460398677493, "loss": 1.2177, "step": 3674 }, { "epoch": 0.32226727082645384, "grad_norm": 0.0634765625, "learning_rate": 0.0025829514144410383, "loss": 1.2181, "step": 3675 }, { "epoch": 0.3223549626008284, "grad_norm": 0.051025390625, "learning_rate": 0.002582656703931567, "loss": 1.2193, "step": 3676 }, { "epoch": 0.322442654375203, "grad_norm": 0.087890625, "learning_rate": 0.002582361908366212, "loss": 1.2433, "step": 3677 }, { "epoch": 0.3225303461495775, "grad_norm": 0.1220703125, "learning_rate": 0.002582067027771858, "loss": 1.2809, "step": 3678 }, { "epoch": 0.32261803792395205, "grad_norm": 0.059814453125, "learning_rate": 0.002581772062175397, "loss": 1.221, "step": 3679 }, { "epoch": 0.3227057296983266, "grad_norm": 0.1025390625, "learning_rate": 0.00258147701160373, "loss": 1.2477, "step": 3680 }, { "epoch": 0.3227934214727011, "grad_norm": 0.061279296875, "learning_rate": 0.0025811818760837634, "loss": 1.2647, "step": 3681 }, { "epoch": 0.3228811132470757, "grad_norm": 0.08544921875, "learning_rate": 0.0025808866556424137, "loss": 1.2774, "step": 3682 }, { "epoch": 0.32296880502145026, "grad_norm": 0.0634765625, "learning_rate": 0.002580591350306604, "loss": 1.3293, "step": 3683 }, { "epoch": 0.32305649679582477, "grad_norm": 0.062255859375, "learning_rate": 0.0025802959601032654, "loss": 1.2613, "step": 3684 }, { "epoch": 0.32314418857019933, "grad_norm": 0.061767578125, "learning_rate": 0.002580000485059337, "loss": 1.23, "step": 3685 }, { "epoch": 0.32323188034457384, "grad_norm": 0.07373046875, "learning_rate": 0.002579704925201765, "loss": 1.2459, "step": 3686 }, { "epoch": 0.3233195721189484, "grad_norm": 0.1064453125, "learning_rate": 0.0025794092805575043, "loss": 1.3314, "step": 3687 }, { "epoch": 0.32340726389332297, "grad_norm": 0.06884765625, "learning_rate": 0.0025791135511535153, "loss": 1.2347, "step": 3688 }, { "epoch": 0.3234949556676975, "grad_norm": 0.189453125, "learning_rate": 0.0025788177370167703, "loss": 1.206, "step": 3689 }, { "epoch": 0.32358264744207205, "grad_norm": 0.08544921875, "learning_rate": 0.0025785218381742447, "loss": 1.2365, "step": 3690 }, { "epoch": 0.3236703392164466, "grad_norm": 0.1884765625, "learning_rate": 0.0025782258546529243, "loss": 1.2302, "step": 3691 }, { "epoch": 0.3237580309908211, "grad_norm": 0.059814453125, "learning_rate": 0.002577929786479803, "loss": 1.249, "step": 3692 }, { "epoch": 0.3238457227651957, "grad_norm": 0.205078125, "learning_rate": 0.0025776336336818797, "loss": 1.2686, "step": 3693 }, { "epoch": 0.32393341453957025, "grad_norm": 0.05810546875, "learning_rate": 0.002577337396286164, "loss": 1.2722, "step": 3694 }, { "epoch": 0.32402110631394476, "grad_norm": 0.201171875, "learning_rate": 0.0025770410743196715, "loss": 1.2291, "step": 3695 }, { "epoch": 0.3241087980883193, "grad_norm": 0.0908203125, "learning_rate": 0.0025767446678094264, "loss": 1.2383, "step": 3696 }, { "epoch": 0.32419648986269384, "grad_norm": 0.185546875, "learning_rate": 0.0025764481767824596, "loss": 1.2263, "step": 3697 }, { "epoch": 0.3242841816370684, "grad_norm": 0.055908203125, "learning_rate": 0.002576151601265811, "loss": 1.2741, "step": 3698 }, { "epoch": 0.32437187341144297, "grad_norm": 0.23828125, "learning_rate": 0.0025758549412865277, "loss": 1.3006, "step": 3699 }, { "epoch": 0.3244595651858175, "grad_norm": 0.06884765625, "learning_rate": 0.0025755581968716634, "loss": 1.3044, "step": 3700 }, { "epoch": 0.32454725696019204, "grad_norm": 0.19921875, "learning_rate": 0.002575261368048281, "loss": 1.2608, "step": 3701 }, { "epoch": 0.3246349487345666, "grad_norm": 0.1025390625, "learning_rate": 0.0025749644548434497, "loss": 1.2218, "step": 3702 }, { "epoch": 0.3247226405089411, "grad_norm": 0.1416015625, "learning_rate": 0.0025746674572842483, "loss": 1.2413, "step": 3703 }, { "epoch": 0.3248103322833157, "grad_norm": 0.0947265625, "learning_rate": 0.002574370375397762, "loss": 1.2187, "step": 3704 }, { "epoch": 0.32489802405769025, "grad_norm": 0.1328125, "learning_rate": 0.0025740732092110834, "loss": 1.2297, "step": 3705 }, { "epoch": 0.32498571583206476, "grad_norm": 0.08740234375, "learning_rate": 0.0025737759587513144, "loss": 1.2439, "step": 3706 }, { "epoch": 0.3250734076064393, "grad_norm": 0.09033203125, "learning_rate": 0.002573478624045562, "loss": 1.2586, "step": 3707 }, { "epoch": 0.3251610993808139, "grad_norm": 0.07763671875, "learning_rate": 0.002573181205120943, "loss": 1.2364, "step": 3708 }, { "epoch": 0.3252487911551884, "grad_norm": 0.055908203125, "learning_rate": 0.0025728837020045814, "loss": 1.2476, "step": 3709 }, { "epoch": 0.32533648292956296, "grad_norm": 0.072265625, "learning_rate": 0.0025725861147236086, "loss": 1.3058, "step": 3710 }, { "epoch": 0.3254241747039375, "grad_norm": 0.06005859375, "learning_rate": 0.0025722884433051637, "loss": 1.2281, "step": 3711 }, { "epoch": 0.32551186647831204, "grad_norm": 0.0546875, "learning_rate": 0.002571990687776394, "loss": 1.2754, "step": 3712 }, { "epoch": 0.3255995582526866, "grad_norm": 0.0537109375, "learning_rate": 0.002571692848164453, "loss": 1.2339, "step": 3713 }, { "epoch": 0.3256872500270611, "grad_norm": 0.064453125, "learning_rate": 0.002571394924496504, "loss": 1.2448, "step": 3714 }, { "epoch": 0.3257749418014357, "grad_norm": 0.059326171875, "learning_rate": 0.002571096916799716, "loss": 1.2943, "step": 3715 }, { "epoch": 0.32586263357581025, "grad_norm": 0.055908203125, "learning_rate": 0.002570798825101268, "loss": 1.2451, "step": 3716 }, { "epoch": 0.32595032535018476, "grad_norm": 0.057861328125, "learning_rate": 0.002570500649428344, "loss": 1.1782, "step": 3717 }, { "epoch": 0.3260380171245593, "grad_norm": 0.0693359375, "learning_rate": 0.002570202389808136, "loss": 1.2579, "step": 3718 }, { "epoch": 0.3261257088989339, "grad_norm": 0.0673828125, "learning_rate": 0.0025699040462678464, "loss": 1.2854, "step": 3719 }, { "epoch": 0.3262134006733084, "grad_norm": 0.1376953125, "learning_rate": 0.002569605618834682, "loss": 1.2283, "step": 3720 }, { "epoch": 0.32630109244768296, "grad_norm": 0.06298828125, "learning_rate": 0.0025693071075358594, "loss": 1.2415, "step": 3721 }, { "epoch": 0.32638878422205747, "grad_norm": 0.1572265625, "learning_rate": 0.002569008512398602, "loss": 1.2265, "step": 3722 }, { "epoch": 0.32647647599643204, "grad_norm": 0.08544921875, "learning_rate": 0.0025687098334501402, "loss": 1.2853, "step": 3723 }, { "epoch": 0.3265641677708066, "grad_norm": 0.15234375, "learning_rate": 0.002568411070717714, "loss": 1.1371, "step": 3724 }, { "epoch": 0.3266518595451811, "grad_norm": 0.1875, "learning_rate": 0.002568112224228568, "loss": 1.2336, "step": 3725 }, { "epoch": 0.3267395513195557, "grad_norm": 0.2197265625, "learning_rate": 0.002567813294009958, "loss": 1.2667, "step": 3726 }, { "epoch": 0.32682724309393024, "grad_norm": 0.1591796875, "learning_rate": 0.002567514280089144, "loss": 1.2905, "step": 3727 }, { "epoch": 0.32691493486830475, "grad_norm": 0.15234375, "learning_rate": 0.0025672151824933974, "loss": 1.2276, "step": 3728 }, { "epoch": 0.3270026266426793, "grad_norm": 0.11376953125, "learning_rate": 0.002566916001249993, "loss": 1.2252, "step": 3729 }, { "epoch": 0.3270903184170539, "grad_norm": 0.142578125, "learning_rate": 0.002566616736386217, "loss": 1.2228, "step": 3730 }, { "epoch": 0.3271780101914284, "grad_norm": 0.08154296875, "learning_rate": 0.0025663173879293603, "loss": 1.2444, "step": 3731 }, { "epoch": 0.32726570196580296, "grad_norm": 0.115234375, "learning_rate": 0.002566017955906724, "loss": 1.1773, "step": 3732 }, { "epoch": 0.32735339374017747, "grad_norm": 0.09228515625, "learning_rate": 0.002565718440345614, "loss": 1.3635, "step": 3733 }, { "epoch": 0.32744108551455203, "grad_norm": 0.0703125, "learning_rate": 0.002565418841273347, "loss": 1.2341, "step": 3734 }, { "epoch": 0.3275287772889266, "grad_norm": 0.0751953125, "learning_rate": 0.002565119158717244, "loss": 1.3048, "step": 3735 }, { "epoch": 0.3276164690633011, "grad_norm": 0.08251953125, "learning_rate": 0.002564819392704636, "loss": 1.2315, "step": 3736 }, { "epoch": 0.3277041608376757, "grad_norm": 0.06689453125, "learning_rate": 0.0025645195432628617, "loss": 1.2537, "step": 3737 }, { "epoch": 0.32779185261205024, "grad_norm": 0.0771484375, "learning_rate": 0.0025642196104192654, "loss": 1.2474, "step": 3738 }, { "epoch": 0.32787954438642475, "grad_norm": 0.057373046875, "learning_rate": 0.002563919594201201, "loss": 1.1895, "step": 3739 }, { "epoch": 0.3279672361607993, "grad_norm": 0.061279296875, "learning_rate": 0.002563619494636029, "loss": 1.2408, "step": 3740 }, { "epoch": 0.3280549279351739, "grad_norm": 0.0673828125, "learning_rate": 0.002563319311751117, "loss": 1.2274, "step": 3741 }, { "epoch": 0.3281426197095484, "grad_norm": 0.062255859375, "learning_rate": 0.0025630190455738417, "loss": 1.1931, "step": 3742 }, { "epoch": 0.32823031148392295, "grad_norm": 0.0595703125, "learning_rate": 0.0025627186961315867, "loss": 1.261, "step": 3743 }, { "epoch": 0.32831800325829746, "grad_norm": 0.06640625, "learning_rate": 0.0025624182634517425, "loss": 1.1855, "step": 3744 }, { "epoch": 0.32840569503267203, "grad_norm": 0.07666015625, "learning_rate": 0.0025621177475617083, "loss": 1.2555, "step": 3745 }, { "epoch": 0.3284933868070466, "grad_norm": 0.1171875, "learning_rate": 0.00256181714848889, "loss": 1.2875, "step": 3746 }, { "epoch": 0.3285810785814211, "grad_norm": 0.06689453125, "learning_rate": 0.0025615164662607013, "loss": 1.228, "step": 3747 }, { "epoch": 0.32866877035579567, "grad_norm": 0.1123046875, "learning_rate": 0.0025612157009045644, "loss": 1.2088, "step": 3748 }, { "epoch": 0.32875646213017024, "grad_norm": 0.0791015625, "learning_rate": 0.0025609148524479075, "loss": 1.1838, "step": 3749 }, { "epoch": 0.32884415390454474, "grad_norm": 0.07861328125, "learning_rate": 0.002560613920918168, "loss": 1.2646, "step": 3750 }, { "epoch": 0.3289318456789193, "grad_norm": 0.1044921875, "learning_rate": 0.0025603129063427894, "loss": 1.2127, "step": 3751 }, { "epoch": 0.3290195374532939, "grad_norm": 0.181640625, "learning_rate": 0.0025600118087492235, "loss": 1.2264, "step": 3752 }, { "epoch": 0.3291072292276684, "grad_norm": 0.08837890625, "learning_rate": 0.00255971062816493, "loss": 1.2345, "step": 3753 }, { "epoch": 0.32919492100204295, "grad_norm": 0.1220703125, "learning_rate": 0.0025594093646173757, "loss": 1.2361, "step": 3754 }, { "epoch": 0.3292826127764175, "grad_norm": 0.125, "learning_rate": 0.0025591080181340355, "loss": 1.2337, "step": 3755 }, { "epoch": 0.329370304550792, "grad_norm": 0.12109375, "learning_rate": 0.0025588065887423898, "loss": 1.2611, "step": 3756 }, { "epoch": 0.3294579963251666, "grad_norm": 0.1044921875, "learning_rate": 0.00255850507646993, "loss": 1.228, "step": 3757 }, { "epoch": 0.3295456880995411, "grad_norm": 0.06787109375, "learning_rate": 0.0025582034813441522, "loss": 1.2299, "step": 3758 }, { "epoch": 0.32963337987391567, "grad_norm": 0.087890625, "learning_rate": 0.0025579018033925617, "loss": 1.2318, "step": 3759 }, { "epoch": 0.32972107164829023, "grad_norm": 0.087890625, "learning_rate": 0.0025576000426426704, "loss": 1.2341, "step": 3760 }, { "epoch": 0.32980876342266474, "grad_norm": 0.06005859375, "learning_rate": 0.002557298199121998, "loss": 1.3144, "step": 3761 }, { "epoch": 0.3298964551970393, "grad_norm": 0.07568359375, "learning_rate": 0.002556996272858072, "loss": 1.2491, "step": 3762 }, { "epoch": 0.32998414697141387, "grad_norm": 0.06298828125, "learning_rate": 0.0025566942638784273, "loss": 1.3375, "step": 3763 }, { "epoch": 0.3300718387457884, "grad_norm": 0.08984375, "learning_rate": 0.0025563921722106067, "loss": 1.2122, "step": 3764 }, { "epoch": 0.33015953052016295, "grad_norm": 0.06201171875, "learning_rate": 0.0025560899978821594, "loss": 1.2835, "step": 3765 }, { "epoch": 0.3302472222945375, "grad_norm": 0.1455078125, "learning_rate": 0.0025557877409206438, "loss": 1.3033, "step": 3766 }, { "epoch": 0.330334914068912, "grad_norm": 0.053955078125, "learning_rate": 0.002555485401353624, "loss": 1.2315, "step": 3767 }, { "epoch": 0.3304226058432866, "grad_norm": 0.1787109375, "learning_rate": 0.002555182979208674, "loss": 1.2515, "step": 3768 }, { "epoch": 0.3305102976176611, "grad_norm": 0.056884765625, "learning_rate": 0.002554880474513372, "loss": 1.2524, "step": 3769 }, { "epoch": 0.33059798939203566, "grad_norm": 0.140625, "learning_rate": 0.0025545778872953073, "loss": 1.2815, "step": 3770 }, { "epoch": 0.33068568116641023, "grad_norm": 0.06787109375, "learning_rate": 0.002554275217582074, "loss": 1.2472, "step": 3771 }, { "epoch": 0.33077337294078474, "grad_norm": 0.1171875, "learning_rate": 0.002553972465401275, "loss": 1.2624, "step": 3772 }, { "epoch": 0.3308610647151593, "grad_norm": 0.099609375, "learning_rate": 0.002553669630780521, "loss": 1.2444, "step": 3773 }, { "epoch": 0.33094875648953387, "grad_norm": 0.10107421875, "learning_rate": 0.00255336671374743, "loss": 1.2145, "step": 3774 }, { "epoch": 0.3310364482639084, "grad_norm": 0.08984375, "learning_rate": 0.002553063714329626, "loss": 1.2447, "step": 3775 }, { "epoch": 0.33112414003828294, "grad_norm": 0.11669921875, "learning_rate": 0.0025527606325547432, "loss": 1.3018, "step": 3776 }, { "epoch": 0.3312118318126575, "grad_norm": 0.08740234375, "learning_rate": 0.002552457468450421, "loss": 1.2316, "step": 3777 }, { "epoch": 0.331299523587032, "grad_norm": 0.1474609375, "learning_rate": 0.002552154222044307, "loss": 1.3027, "step": 3778 }, { "epoch": 0.3313872153614066, "grad_norm": 0.10791015625, "learning_rate": 0.0025518508933640572, "loss": 1.3015, "step": 3779 }, { "epoch": 0.3314749071357811, "grad_norm": 0.123046875, "learning_rate": 0.002551547482437334, "loss": 1.2499, "step": 3780 }, { "epoch": 0.33156259891015566, "grad_norm": 0.06298828125, "learning_rate": 0.0025512439892918072, "loss": 1.1946, "step": 3781 }, { "epoch": 0.3316502906845302, "grad_norm": 0.1337890625, "learning_rate": 0.0025509404139551555, "loss": 1.3158, "step": 3782 }, { "epoch": 0.33173798245890473, "grad_norm": 0.0537109375, "learning_rate": 0.002550636756455064, "loss": 1.2004, "step": 3783 }, { "epoch": 0.3318256742332793, "grad_norm": 0.1103515625, "learning_rate": 0.0025503330168192254, "loss": 1.2695, "step": 3784 }, { "epoch": 0.33191336600765386, "grad_norm": 0.07177734375, "learning_rate": 0.0025500291950753392, "loss": 1.2612, "step": 3785 }, { "epoch": 0.3320010577820284, "grad_norm": 0.06787109375, "learning_rate": 0.0025497252912511145, "loss": 1.212, "step": 3786 }, { "epoch": 0.33208874955640294, "grad_norm": 0.0693359375, "learning_rate": 0.0025494213053742654, "loss": 1.2935, "step": 3787 }, { "epoch": 0.3321764413307775, "grad_norm": 0.1044921875, "learning_rate": 0.0025491172374725157, "loss": 1.2526, "step": 3788 }, { "epoch": 0.332264133105152, "grad_norm": 0.0673828125, "learning_rate": 0.0025488130875735947, "loss": 1.2287, "step": 3789 }, { "epoch": 0.3323518248795266, "grad_norm": 0.1328125, "learning_rate": 0.0025485088557052403, "loss": 1.2283, "step": 3790 }, { "epoch": 0.33243951665390115, "grad_norm": 0.087890625, "learning_rate": 0.0025482045418951976, "loss": 1.2436, "step": 3791 }, { "epoch": 0.33252720842827566, "grad_norm": 0.0927734375, "learning_rate": 0.0025479001461712193, "loss": 1.2762, "step": 3792 }, { "epoch": 0.3326149002026502, "grad_norm": 0.06396484375, "learning_rate": 0.002547595668561066, "loss": 1.2402, "step": 3793 }, { "epoch": 0.33270259197702473, "grad_norm": 0.0712890625, "learning_rate": 0.002547291109092505, "loss": 1.2112, "step": 3794 }, { "epoch": 0.3327902837513993, "grad_norm": 0.07275390625, "learning_rate": 0.002546986467793311, "loss": 1.2288, "step": 3795 }, { "epoch": 0.33287797552577386, "grad_norm": 0.10888671875, "learning_rate": 0.0025466817446912664, "loss": 1.2117, "step": 3796 }, { "epoch": 0.33296566730014837, "grad_norm": 0.07177734375, "learning_rate": 0.0025463769398141613, "loss": 1.2757, "step": 3797 }, { "epoch": 0.33305335907452294, "grad_norm": 0.10009765625, "learning_rate": 0.002546072053189793, "loss": 1.2539, "step": 3798 }, { "epoch": 0.3331410508488975, "grad_norm": 0.1279296875, "learning_rate": 0.002545767084845968, "loss": 1.255, "step": 3799 }, { "epoch": 0.333228742623272, "grad_norm": 0.061767578125, "learning_rate": 0.002545462034810496, "loss": 1.274, "step": 3800 }, { "epoch": 0.3333164343976466, "grad_norm": 0.09912109375, "learning_rate": 0.002545156903111199, "loss": 1.2682, "step": 3801 }, { "epoch": 0.33340412617202114, "grad_norm": 0.0732421875, "learning_rate": 0.002544851689775903, "loss": 1.236, "step": 3802 }, { "epoch": 0.33349181794639565, "grad_norm": 0.11474609375, "learning_rate": 0.0025445463948324423, "loss": 1.237, "step": 3803 }, { "epoch": 0.3335795097207702, "grad_norm": 0.1044921875, "learning_rate": 0.00254424101830866, "loss": 1.2598, "step": 3804 }, { "epoch": 0.3336672014951447, "grad_norm": 0.080078125, "learning_rate": 0.002543935560232406, "loss": 1.2575, "step": 3805 }, { "epoch": 0.3337548932695193, "grad_norm": 0.1494140625, "learning_rate": 0.002543630020631536, "loss": 1.2809, "step": 3806 }, { "epoch": 0.33384258504389386, "grad_norm": 0.055419921875, "learning_rate": 0.0025433243995339147, "loss": 1.2258, "step": 3807 }, { "epoch": 0.33393027681826837, "grad_norm": 0.14453125, "learning_rate": 0.0025430186969674145, "loss": 1.2936, "step": 3808 }, { "epoch": 0.33401796859264293, "grad_norm": 0.06689453125, "learning_rate": 0.0025427129129599146, "loss": 1.261, "step": 3809 }, { "epoch": 0.3341056603670175, "grad_norm": 0.0849609375, "learning_rate": 0.0025424070475393015, "loss": 1.2749, "step": 3810 }, { "epoch": 0.334193352141392, "grad_norm": 0.0927734375, "learning_rate": 0.0025421011007334695, "loss": 1.2608, "step": 3811 }, { "epoch": 0.3342810439157666, "grad_norm": 0.060302734375, "learning_rate": 0.0025417950725703203, "loss": 1.2529, "step": 3812 }, { "epoch": 0.33436873569014114, "grad_norm": 0.10693359375, "learning_rate": 0.0025414889630777622, "loss": 1.2115, "step": 3813 }, { "epoch": 0.33445642746451565, "grad_norm": 0.0673828125, "learning_rate": 0.002541182772283712, "loss": 1.1896, "step": 3814 }, { "epoch": 0.3345441192388902, "grad_norm": 0.1162109375, "learning_rate": 0.0025408765002160934, "loss": 1.1914, "step": 3815 }, { "epoch": 0.3346318110132647, "grad_norm": 0.08447265625, "learning_rate": 0.0025405701469028382, "loss": 1.2526, "step": 3816 }, { "epoch": 0.3347195027876393, "grad_norm": 0.1318359375, "learning_rate": 0.0025402637123718846, "loss": 1.2586, "step": 3817 }, { "epoch": 0.33480719456201385, "grad_norm": 0.068359375, "learning_rate": 0.0025399571966511787, "loss": 1.2677, "step": 3818 }, { "epoch": 0.33489488633638836, "grad_norm": 0.142578125, "learning_rate": 0.0025396505997686736, "loss": 1.2153, "step": 3819 }, { "epoch": 0.33498257811076293, "grad_norm": 0.060791015625, "learning_rate": 0.00253934392175233, "loss": 1.2202, "step": 3820 }, { "epoch": 0.3350702698851375, "grad_norm": 0.1494140625, "learning_rate": 0.0025390371626301173, "loss": 1.2413, "step": 3821 }, { "epoch": 0.335157961659512, "grad_norm": 0.083984375, "learning_rate": 0.00253873032243001, "loss": 1.2733, "step": 3822 }, { "epoch": 0.33524565343388657, "grad_norm": 0.1640625, "learning_rate": 0.002538423401179992, "loss": 1.2891, "step": 3823 }, { "epoch": 0.33533334520826114, "grad_norm": 0.1064453125, "learning_rate": 0.0025381163989080533, "loss": 1.2813, "step": 3824 }, { "epoch": 0.33542103698263565, "grad_norm": 0.1611328125, "learning_rate": 0.002537809315642191, "loss": 1.3156, "step": 3825 }, { "epoch": 0.3355087287570102, "grad_norm": 0.10498046875, "learning_rate": 0.002537502151410411, "loss": 1.2366, "step": 3826 }, { "epoch": 0.3355964205313848, "grad_norm": 0.123046875, "learning_rate": 0.0025371949062407266, "loss": 1.3143, "step": 3827 }, { "epoch": 0.3356841123057593, "grad_norm": 0.0888671875, "learning_rate": 0.002536887580161157, "loss": 1.236, "step": 3828 }, { "epoch": 0.33577180408013385, "grad_norm": 0.11474609375, "learning_rate": 0.002536580173199729, "loss": 1.2422, "step": 3829 }, { "epoch": 0.33585949585450836, "grad_norm": 0.09033203125, "learning_rate": 0.002536272685384478, "loss": 1.2508, "step": 3830 }, { "epoch": 0.3359471876288829, "grad_norm": 0.09423828125, "learning_rate": 0.002535965116743446, "loss": 1.2801, "step": 3831 }, { "epoch": 0.3360348794032575, "grad_norm": 0.0859375, "learning_rate": 0.0025356574673046826, "loss": 1.2149, "step": 3832 }, { "epoch": 0.336122571177632, "grad_norm": 0.0576171875, "learning_rate": 0.002535349737096244, "loss": 1.2393, "step": 3833 }, { "epoch": 0.33621026295200657, "grad_norm": 0.0703125, "learning_rate": 0.0025350419261461962, "loss": 1.205, "step": 3834 }, { "epoch": 0.33629795472638113, "grad_norm": 0.0634765625, "learning_rate": 0.002534734034482608, "loss": 1.2797, "step": 3835 }, { "epoch": 0.33638564650075564, "grad_norm": 0.06787109375, "learning_rate": 0.0025344260621335607, "loss": 1.179, "step": 3836 }, { "epoch": 0.3364733382751302, "grad_norm": 0.06787109375, "learning_rate": 0.0025341180091271393, "loss": 1.2446, "step": 3837 }, { "epoch": 0.3365610300495048, "grad_norm": 0.059814453125, "learning_rate": 0.0025338098754914377, "loss": 1.2094, "step": 3838 }, { "epoch": 0.3366487218238793, "grad_norm": 0.0634765625, "learning_rate": 0.0025335016612545573, "loss": 1.2244, "step": 3839 }, { "epoch": 0.33673641359825385, "grad_norm": 0.06689453125, "learning_rate": 0.002533193366444606, "loss": 1.2693, "step": 3840 }, { "epoch": 0.33682410537262836, "grad_norm": 0.07470703125, "learning_rate": 0.0025328849910896995, "loss": 1.2777, "step": 3841 }, { "epoch": 0.3369117971470029, "grad_norm": 0.056884765625, "learning_rate": 0.002532576535217961, "loss": 1.2065, "step": 3842 }, { "epoch": 0.3369994889213775, "grad_norm": 0.0673828125, "learning_rate": 0.0025322679988575214, "loss": 1.3, "step": 3843 }, { "epoch": 0.337087180695752, "grad_norm": 0.06640625, "learning_rate": 0.0025319593820365177, "loss": 1.1818, "step": 3844 }, { "epoch": 0.33717487247012656, "grad_norm": 0.0615234375, "learning_rate": 0.0025316506847830947, "loss": 1.2678, "step": 3845 }, { "epoch": 0.33726256424450113, "grad_norm": 0.07373046875, "learning_rate": 0.002531341907125405, "loss": 1.2082, "step": 3846 }, { "epoch": 0.33735025601887564, "grad_norm": 0.07177734375, "learning_rate": 0.002531033049091609, "loss": 1.1894, "step": 3847 }, { "epoch": 0.3374379477932502, "grad_norm": 0.07470703125, "learning_rate": 0.002530724110709873, "loss": 1.2238, "step": 3848 }, { "epoch": 0.33752563956762477, "grad_norm": 0.068359375, "learning_rate": 0.0025304150920083717, "loss": 1.2687, "step": 3849 }, { "epoch": 0.3376133313419993, "grad_norm": 0.06103515625, "learning_rate": 0.0025301059930152876, "loss": 1.3019, "step": 3850 }, { "epoch": 0.33770102311637384, "grad_norm": 0.06494140625, "learning_rate": 0.002529796813758808, "loss": 1.2432, "step": 3851 }, { "epoch": 0.33778871489074835, "grad_norm": 0.0673828125, "learning_rate": 0.0025294875542671304, "loss": 1.2113, "step": 3852 }, { "epoch": 0.3378764066651229, "grad_norm": 0.09375, "learning_rate": 0.0025291782145684585, "loss": 1.2333, "step": 3853 }, { "epoch": 0.3379640984394975, "grad_norm": 0.08642578125, "learning_rate": 0.0025288687946910028, "loss": 1.2834, "step": 3854 }, { "epoch": 0.338051790213872, "grad_norm": 0.09326171875, "learning_rate": 0.0025285592946629816, "loss": 1.2635, "step": 3855 }, { "epoch": 0.33813948198824656, "grad_norm": 0.138671875, "learning_rate": 0.002528249714512621, "loss": 1.2457, "step": 3856 }, { "epoch": 0.3382271737626211, "grad_norm": 0.08154296875, "learning_rate": 0.0025279400542681536, "loss": 1.2284, "step": 3857 }, { "epoch": 0.33831486553699563, "grad_norm": 0.1435546875, "learning_rate": 0.00252763031395782, "loss": 1.2732, "step": 3858 }, { "epoch": 0.3384025573113702, "grad_norm": 0.08203125, "learning_rate": 0.002527320493609867, "loss": 1.2643, "step": 3859 }, { "epoch": 0.33849024908574477, "grad_norm": 0.111328125, "learning_rate": 0.002527010593252549, "loss": 1.2151, "step": 3860 }, { "epoch": 0.3385779408601193, "grad_norm": 0.0830078125, "learning_rate": 0.0025267006129141303, "loss": 1.2235, "step": 3861 }, { "epoch": 0.33866563263449384, "grad_norm": 0.12451171875, "learning_rate": 0.0025263905526228783, "loss": 1.206, "step": 3862 }, { "epoch": 0.3387533244088684, "grad_norm": 0.10986328125, "learning_rate": 0.00252608041240707, "loss": 1.2781, "step": 3863 }, { "epoch": 0.3388410161832429, "grad_norm": 0.1669921875, "learning_rate": 0.00252577019229499, "loss": 1.2216, "step": 3864 }, { "epoch": 0.3389287079576175, "grad_norm": 0.11767578125, "learning_rate": 0.0025254598923149293, "loss": 1.2875, "step": 3865 }, { "epoch": 0.339016399731992, "grad_norm": 0.1611328125, "learning_rate": 0.002525149512495186, "loss": 1.2547, "step": 3866 }, { "epoch": 0.33910409150636656, "grad_norm": 0.09130859375, "learning_rate": 0.0025248390528640665, "loss": 1.1919, "step": 3867 }, { "epoch": 0.3391917832807411, "grad_norm": 0.12890625, "learning_rate": 0.002524528513449884, "loss": 1.2613, "step": 3868 }, { "epoch": 0.33927947505511563, "grad_norm": 0.0791015625, "learning_rate": 0.0025242178942809584, "loss": 1.2044, "step": 3869 }, { "epoch": 0.3393671668294902, "grad_norm": 0.07177734375, "learning_rate": 0.002523907195385618, "loss": 1.2271, "step": 3870 }, { "epoch": 0.33945485860386476, "grad_norm": 0.0703125, "learning_rate": 0.002523596416792197, "loss": 1.2427, "step": 3871 }, { "epoch": 0.33954255037823927, "grad_norm": 0.1123046875, "learning_rate": 0.002523285558529038, "loss": 1.2841, "step": 3872 }, { "epoch": 0.33963024215261384, "grad_norm": 0.08935546875, "learning_rate": 0.0025229746206244905, "loss": 1.2728, "step": 3873 }, { "epoch": 0.3397179339269884, "grad_norm": 0.10693359375, "learning_rate": 0.0025226636031069117, "loss": 1.2411, "step": 3874 }, { "epoch": 0.3398056257013629, "grad_norm": 0.166015625, "learning_rate": 0.002522352506004664, "loss": 1.3056, "step": 3875 }, { "epoch": 0.3398933174757375, "grad_norm": 0.0947265625, "learning_rate": 0.0025220413293461203, "loss": 1.2233, "step": 3876 }, { "epoch": 0.339981009250112, "grad_norm": 0.1845703125, "learning_rate": 0.002521730073159659, "loss": 1.2258, "step": 3877 }, { "epoch": 0.34006870102448655, "grad_norm": 0.10888671875, "learning_rate": 0.0025214187374736647, "loss": 1.2153, "step": 3878 }, { "epoch": 0.3401563927988611, "grad_norm": 0.126953125, "learning_rate": 0.002521107322316531, "loss": 1.2681, "step": 3879 }, { "epoch": 0.34024408457323563, "grad_norm": 0.1357421875, "learning_rate": 0.002520795827716659, "loss": 1.2595, "step": 3880 }, { "epoch": 0.3403317763476102, "grad_norm": 0.078125, "learning_rate": 0.002520484253702455, "loss": 1.2576, "step": 3881 }, { "epoch": 0.34041946812198476, "grad_norm": 0.154296875, "learning_rate": 0.0025201726003023337, "loss": 1.183, "step": 3882 }, { "epoch": 0.34050715989635927, "grad_norm": 0.0712890625, "learning_rate": 0.0025198608675447184, "loss": 1.2598, "step": 3883 }, { "epoch": 0.34059485167073383, "grad_norm": 0.1416015625, "learning_rate": 0.002519549055458037, "loss": 1.2995, "step": 3884 }, { "epoch": 0.3406825434451084, "grad_norm": 0.083984375, "learning_rate": 0.0025192371640707267, "loss": 1.2689, "step": 3885 }, { "epoch": 0.3407702352194829, "grad_norm": 0.10498046875, "learning_rate": 0.002518925193411231, "loss": 1.256, "step": 3886 }, { "epoch": 0.3408579269938575, "grad_norm": 0.0654296875, "learning_rate": 0.002518613143508001, "loss": 1.2268, "step": 3887 }, { "epoch": 0.340945618768232, "grad_norm": 0.0859375, "learning_rate": 0.002518301014389494, "loss": 1.214, "step": 3888 }, { "epoch": 0.34103331054260655, "grad_norm": 0.06396484375, "learning_rate": 0.002517988806084176, "loss": 1.2892, "step": 3889 }, { "epoch": 0.3411210023169811, "grad_norm": 0.05810546875, "learning_rate": 0.00251767651862052, "loss": 1.2134, "step": 3890 }, { "epoch": 0.3412086940913556, "grad_norm": 0.06982421875, "learning_rate": 0.0025173641520270054, "loss": 1.233, "step": 3891 }, { "epoch": 0.3412963858657302, "grad_norm": 0.06689453125, "learning_rate": 0.0025170517063321197, "loss": 1.2137, "step": 3892 }, { "epoch": 0.34138407764010475, "grad_norm": 0.07861328125, "learning_rate": 0.0025167391815643556, "loss": 1.2048, "step": 3893 }, { "epoch": 0.34147176941447926, "grad_norm": 0.07275390625, "learning_rate": 0.0025164265777522163, "loss": 1.2007, "step": 3894 }, { "epoch": 0.34155946118885383, "grad_norm": 0.06591796875, "learning_rate": 0.0025161138949242098, "loss": 1.2303, "step": 3895 }, { "epoch": 0.3416471529632284, "grad_norm": 0.1220703125, "learning_rate": 0.0025158011331088514, "loss": 1.2853, "step": 3896 }, { "epoch": 0.3417348447376029, "grad_norm": 0.08740234375, "learning_rate": 0.0025154882923346658, "loss": 1.2677, "step": 3897 }, { "epoch": 0.34182253651197747, "grad_norm": 0.0673828125, "learning_rate": 0.0025151753726301808, "loss": 1.258, "step": 3898 }, { "epoch": 0.34191022828635204, "grad_norm": 0.11669921875, "learning_rate": 0.002514862374023936, "loss": 1.2856, "step": 3899 }, { "epoch": 0.34199792006072655, "grad_norm": 0.056884765625, "learning_rate": 0.0025145492965444756, "loss": 1.3317, "step": 3900 }, { "epoch": 0.3420856118351011, "grad_norm": 0.09326171875, "learning_rate": 0.0025142361402203505, "loss": 1.2366, "step": 3901 }, { "epoch": 0.3421733036094756, "grad_norm": 0.0595703125, "learning_rate": 0.0025139229050801207, "loss": 1.2039, "step": 3902 }, { "epoch": 0.3422609953838502, "grad_norm": 0.068359375, "learning_rate": 0.0025136095911523523, "loss": 1.2236, "step": 3903 }, { "epoch": 0.34234868715822475, "grad_norm": 0.095703125, "learning_rate": 0.002513296198465619, "loss": 1.2212, "step": 3904 }, { "epoch": 0.34243637893259926, "grad_norm": 0.0673828125, "learning_rate": 0.0025129827270485, "loss": 1.2503, "step": 3905 }, { "epoch": 0.3425240707069738, "grad_norm": 0.06982421875, "learning_rate": 0.0025126691769295846, "loss": 1.2586, "step": 3906 }, { "epoch": 0.3426117624813484, "grad_norm": 0.060546875, "learning_rate": 0.002512355548137467, "loss": 1.251, "step": 3907 }, { "epoch": 0.3426994542557229, "grad_norm": 0.05908203125, "learning_rate": 0.00251204184070075, "loss": 1.1944, "step": 3908 }, { "epoch": 0.34278714603009747, "grad_norm": 0.0673828125, "learning_rate": 0.0025117280546480417, "loss": 1.2339, "step": 3909 }, { "epoch": 0.34287483780447203, "grad_norm": 0.064453125, "learning_rate": 0.0025114141900079603, "loss": 1.2355, "step": 3910 }, { "epoch": 0.34296252957884654, "grad_norm": 0.060546875, "learning_rate": 0.002511100246809128, "loss": 1.2453, "step": 3911 }, { "epoch": 0.3430502213532211, "grad_norm": 0.055419921875, "learning_rate": 0.0025107862250801764, "loss": 1.2421, "step": 3912 }, { "epoch": 0.3431379131275956, "grad_norm": 0.059326171875, "learning_rate": 0.002510472124849743, "loss": 1.2565, "step": 3913 }, { "epoch": 0.3432256049019702, "grad_norm": 0.061279296875, "learning_rate": 0.002510157946146473, "loss": 1.2021, "step": 3914 }, { "epoch": 0.34331329667634475, "grad_norm": 0.060791015625, "learning_rate": 0.0025098436889990187, "loss": 1.2609, "step": 3915 }, { "epoch": 0.34340098845071926, "grad_norm": 0.057861328125, "learning_rate": 0.0025095293534360395, "loss": 1.2487, "step": 3916 }, { "epoch": 0.3434886802250938, "grad_norm": 0.06103515625, "learning_rate": 0.0025092149394862024, "loss": 1.2609, "step": 3917 }, { "epoch": 0.3435763719994684, "grad_norm": 0.061279296875, "learning_rate": 0.0025089004471781813, "loss": 1.2354, "step": 3918 }, { "epoch": 0.3436640637738429, "grad_norm": 0.061279296875, "learning_rate": 0.0025085858765406567, "loss": 1.1465, "step": 3919 }, { "epoch": 0.34375175554821746, "grad_norm": 0.056396484375, "learning_rate": 0.002508271227602316, "loss": 1.2679, "step": 3920 }, { "epoch": 0.34383944732259203, "grad_norm": 0.062255859375, "learning_rate": 0.002507956500391855, "loss": 1.2574, "step": 3921 }, { "epoch": 0.34392713909696654, "grad_norm": 0.10205078125, "learning_rate": 0.0025076416949379758, "loss": 1.2663, "step": 3922 }, { "epoch": 0.3440148308713411, "grad_norm": 0.0537109375, "learning_rate": 0.0025073268112693887, "loss": 1.2327, "step": 3923 }, { "epoch": 0.3441025226457156, "grad_norm": 0.0732421875, "learning_rate": 0.002507011849414809, "loss": 1.2299, "step": 3924 }, { "epoch": 0.3441902144200902, "grad_norm": 0.060546875, "learning_rate": 0.002506696809402961, "loss": 1.2659, "step": 3925 }, { "epoch": 0.34427790619446474, "grad_norm": 0.09228515625, "learning_rate": 0.002506381691262576, "loss": 1.2434, "step": 3926 }, { "epoch": 0.34436559796883925, "grad_norm": 0.05419921875, "learning_rate": 0.0025060664950223915, "loss": 1.2113, "step": 3927 }, { "epoch": 0.3444532897432138, "grad_norm": 0.09716796875, "learning_rate": 0.002505751220711152, "loss": 1.1845, "step": 3928 }, { "epoch": 0.3445409815175884, "grad_norm": 0.08251953125, "learning_rate": 0.0025054358683576105, "loss": 1.1872, "step": 3929 }, { "epoch": 0.3446286732919629, "grad_norm": 0.07763671875, "learning_rate": 0.0025051204379905262, "loss": 1.2523, "step": 3930 }, { "epoch": 0.34471636506633746, "grad_norm": 0.10498046875, "learning_rate": 0.0025048049296386658, "loss": 1.2882, "step": 3931 }, { "epoch": 0.344804056840712, "grad_norm": 0.064453125, "learning_rate": 0.0025044893433308017, "loss": 1.1924, "step": 3932 }, { "epoch": 0.34489174861508654, "grad_norm": 0.07958984375, "learning_rate": 0.0025041736790957156, "loss": 1.2367, "step": 3933 }, { "epoch": 0.3449794403894611, "grad_norm": 0.06494140625, "learning_rate": 0.0025038579369621953, "loss": 1.2217, "step": 3934 }, { "epoch": 0.34506713216383567, "grad_norm": 0.107421875, "learning_rate": 0.002503542116959035, "loss": 1.2882, "step": 3935 }, { "epoch": 0.3451548239382102, "grad_norm": 0.0703125, "learning_rate": 0.0025032262191150376, "loss": 1.2185, "step": 3936 }, { "epoch": 0.34524251571258474, "grad_norm": 0.12890625, "learning_rate": 0.0025029102434590113, "loss": 1.2478, "step": 3937 }, { "epoch": 0.34533020748695925, "grad_norm": 0.07958984375, "learning_rate": 0.002502594190019773, "loss": 1.2248, "step": 3938 }, { "epoch": 0.3454178992613338, "grad_norm": 0.16796875, "learning_rate": 0.0025022780588261446, "loss": 1.2349, "step": 3939 }, { "epoch": 0.3455055910357084, "grad_norm": 0.07080078125, "learning_rate": 0.0025019618499069578, "loss": 1.2361, "step": 3940 }, { "epoch": 0.3455932828100829, "grad_norm": 0.1826171875, "learning_rate": 0.00250164556329105, "loss": 1.1849, "step": 3941 }, { "epoch": 0.34568097458445746, "grad_norm": 0.099609375, "learning_rate": 0.0025013291990072647, "loss": 1.217, "step": 3942 }, { "epoch": 0.345768666358832, "grad_norm": 0.1513671875, "learning_rate": 0.0025010127570844545, "loss": 1.2832, "step": 3943 }, { "epoch": 0.34585635813320653, "grad_norm": 0.07666015625, "learning_rate": 0.002500696237551478, "loss": 1.2297, "step": 3944 }, { "epoch": 0.3459440499075811, "grad_norm": 0.0859375, "learning_rate": 0.0025003796404372003, "loss": 1.2372, "step": 3945 }, { "epoch": 0.34603174168195566, "grad_norm": 0.05810546875, "learning_rate": 0.0025000629657704943, "loss": 1.1575, "step": 3946 }, { "epoch": 0.34611943345633017, "grad_norm": 0.12109375, "learning_rate": 0.0024997462135802413, "loss": 1.2751, "step": 3947 }, { "epoch": 0.34620712523070474, "grad_norm": 0.138671875, "learning_rate": 0.002499429383895326, "loss": 1.2396, "step": 3948 }, { "epoch": 0.34629481700507925, "grad_norm": 0.10986328125, "learning_rate": 0.0024991124767446446, "loss": 1.2758, "step": 3949 }, { "epoch": 0.3463825087794538, "grad_norm": 0.1630859375, "learning_rate": 0.0024987954921570966, "loss": 1.2085, "step": 3950 }, { "epoch": 0.3464702005538284, "grad_norm": 0.154296875, "learning_rate": 0.0024984784301615914, "loss": 1.2164, "step": 3951 }, { "epoch": 0.3465578923282029, "grad_norm": 0.1865234375, "learning_rate": 0.0024981612907870434, "loss": 1.2172, "step": 3952 }, { "epoch": 0.34664558410257745, "grad_norm": 0.1328125, "learning_rate": 0.0024978440740623755, "loss": 1.2444, "step": 3953 }, { "epoch": 0.346733275876952, "grad_norm": 0.140625, "learning_rate": 0.002497526780016517, "loss": 1.1621, "step": 3954 }, { "epoch": 0.34682096765132653, "grad_norm": 0.10888671875, "learning_rate": 0.002497209408678403, "loss": 1.2253, "step": 3955 }, { "epoch": 0.3469086594257011, "grad_norm": 0.05615234375, "learning_rate": 0.0024968919600769786, "loss": 1.2584, "step": 3956 }, { "epoch": 0.34699635120007566, "grad_norm": 0.12109375, "learning_rate": 0.0024965744342411943, "loss": 1.3249, "step": 3957 }, { "epoch": 0.34708404297445017, "grad_norm": 0.080078125, "learning_rate": 0.002496256831200006, "loss": 1.2529, "step": 3958 }, { "epoch": 0.34717173474882473, "grad_norm": 0.09375, "learning_rate": 0.0024959391509823798, "loss": 1.2555, "step": 3959 }, { "epoch": 0.34725942652319924, "grad_norm": 0.08984375, "learning_rate": 0.0024956213936172863, "loss": 1.2856, "step": 3960 }, { "epoch": 0.3473471182975738, "grad_norm": 0.10888671875, "learning_rate": 0.002495303559133706, "loss": 1.2621, "step": 3961 }, { "epoch": 0.3474348100719484, "grad_norm": 0.10791015625, "learning_rate": 0.002494985647560622, "loss": 1.2749, "step": 3962 }, { "epoch": 0.3475225018463229, "grad_norm": 0.080078125, "learning_rate": 0.0024946676589270283, "loss": 1.2544, "step": 3963 }, { "epoch": 0.34761019362069745, "grad_norm": 0.12109375, "learning_rate": 0.0024943495932619247, "loss": 1.2811, "step": 3964 }, { "epoch": 0.347697885395072, "grad_norm": 0.06640625, "learning_rate": 0.0024940314505943187, "loss": 1.1936, "step": 3965 }, { "epoch": 0.3477855771694465, "grad_norm": 0.1123046875, "learning_rate": 0.002493713230953222, "loss": 1.288, "step": 3966 }, { "epoch": 0.3478732689438211, "grad_norm": 0.0673828125, "learning_rate": 0.002493394934367658, "loss": 1.2005, "step": 3967 }, { "epoch": 0.34796096071819566, "grad_norm": 0.1455078125, "learning_rate": 0.002493076560866652, "loss": 1.2425, "step": 3968 }, { "epoch": 0.34804865249257017, "grad_norm": 0.09130859375, "learning_rate": 0.0024927581104792406, "loss": 1.233, "step": 3969 }, { "epoch": 0.34813634426694473, "grad_norm": 0.1162109375, "learning_rate": 0.0024924395832344654, "loss": 1.329, "step": 3970 }, { "epoch": 0.34822403604131924, "grad_norm": 0.078125, "learning_rate": 0.0024921209791613744, "loss": 1.2337, "step": 3971 }, { "epoch": 0.3483117278156938, "grad_norm": 0.08935546875, "learning_rate": 0.002491802298289024, "loss": 1.267, "step": 3972 }, { "epoch": 0.34839941959006837, "grad_norm": 0.095703125, "learning_rate": 0.0024914835406464773, "loss": 1.3022, "step": 3973 }, { "epoch": 0.3484871113644429, "grad_norm": 0.1025390625, "learning_rate": 0.002491164706262804, "loss": 1.2107, "step": 3974 }, { "epoch": 0.34857480313881745, "grad_norm": 0.1171875, "learning_rate": 0.002490845795167081, "loss": 1.2571, "step": 3975 }, { "epoch": 0.348662494913192, "grad_norm": 0.1259765625, "learning_rate": 0.0024905268073883925, "loss": 1.3063, "step": 3976 }, { "epoch": 0.3487501866875665, "grad_norm": 0.1142578125, "learning_rate": 0.002490207742955828, "loss": 1.2488, "step": 3977 }, { "epoch": 0.3488378784619411, "grad_norm": 0.09814453125, "learning_rate": 0.0024898886018984868, "loss": 1.267, "step": 3978 }, { "epoch": 0.34892557023631565, "grad_norm": 0.08544921875, "learning_rate": 0.0024895693842454736, "loss": 1.2091, "step": 3979 }, { "epoch": 0.34901326201069016, "grad_norm": 0.1513671875, "learning_rate": 0.002489250090025899, "loss": 1.2057, "step": 3980 }, { "epoch": 0.3491009537850647, "grad_norm": 0.057861328125, "learning_rate": 0.002488930719268884, "loss": 1.2133, "step": 3981 }, { "epoch": 0.3491886455594393, "grad_norm": 0.1376953125, "learning_rate": 0.0024886112720035524, "loss": 1.1726, "step": 3982 }, { "epoch": 0.3492763373338138, "grad_norm": 0.1142578125, "learning_rate": 0.0024882917482590377, "loss": 1.229, "step": 3983 }, { "epoch": 0.34936402910818837, "grad_norm": 0.0908203125, "learning_rate": 0.0024879721480644793, "loss": 1.222, "step": 3984 }, { "epoch": 0.3494517208825629, "grad_norm": 0.1669921875, "learning_rate": 0.0024876524714490247, "loss": 1.2489, "step": 3985 }, { "epoch": 0.34953941265693744, "grad_norm": 0.05908203125, "learning_rate": 0.002487332718441827, "loss": 1.2031, "step": 3986 }, { "epoch": 0.349627104431312, "grad_norm": 0.1591796875, "learning_rate": 0.0024870128890720467, "loss": 1.2162, "step": 3987 }, { "epoch": 0.3497147962056865, "grad_norm": 0.06103515625, "learning_rate": 0.002486692983368852, "loss": 1.2665, "step": 3988 }, { "epoch": 0.3498024879800611, "grad_norm": 0.10498046875, "learning_rate": 0.0024863730013614167, "loss": 1.234, "step": 3989 }, { "epoch": 0.34989017975443565, "grad_norm": 0.08349609375, "learning_rate": 0.0024860529430789226, "loss": 1.2242, "step": 3990 }, { "epoch": 0.34997787152881016, "grad_norm": 0.06640625, "learning_rate": 0.0024857328085505586, "loss": 1.2219, "step": 3991 }, { "epoch": 0.3500655633031847, "grad_norm": 0.08642578125, "learning_rate": 0.00248541259780552, "loss": 1.2252, "step": 3992 }, { "epoch": 0.3501532550775593, "grad_norm": 0.061767578125, "learning_rate": 0.0024850923108730096, "loss": 1.2238, "step": 3993 }, { "epoch": 0.3502409468519338, "grad_norm": 0.0732421875, "learning_rate": 0.0024847719477822353, "loss": 1.3118, "step": 3994 }, { "epoch": 0.35032863862630836, "grad_norm": 0.10400390625, "learning_rate": 0.0024844515085624143, "loss": 1.2506, "step": 3995 }, { "epoch": 0.3504163304006829, "grad_norm": 0.06298828125, "learning_rate": 0.002484130993242771, "loss": 1.2293, "step": 3996 }, { "epoch": 0.35050402217505744, "grad_norm": 0.111328125, "learning_rate": 0.002483810401852534, "loss": 1.1963, "step": 3997 }, { "epoch": 0.350591713949432, "grad_norm": 0.08740234375, "learning_rate": 0.00248348973442094, "loss": 1.2412, "step": 3998 }, { "epoch": 0.3506794057238065, "grad_norm": 0.1591796875, "learning_rate": 0.0024831689909772343, "loss": 1.2672, "step": 3999 }, { "epoch": 0.3507670974981811, "grad_norm": 0.10400390625, "learning_rate": 0.002482848171550668, "loss": 1.2501, "step": 4000 }, { "epoch": 0.3507670974981811, "eval_loss": 1.2495797872543335, "eval_runtime": 429.3686, "eval_samples_per_second": 33.647, "eval_steps_per_second": 8.412, "step": 4000 }, { "epoch": 0.35085478927255564, "grad_norm": 0.076171875, "learning_rate": 0.0024825272761704975, "loss": 1.232, "step": 4001 }, { "epoch": 0.35094248104693015, "grad_norm": 0.078125, "learning_rate": 0.0024822063048659886, "loss": 1.2937, "step": 4002 }, { "epoch": 0.3510301728213047, "grad_norm": 0.06103515625, "learning_rate": 0.0024818852576664135, "loss": 1.2209, "step": 4003 }, { "epoch": 0.3511178645956793, "grad_norm": 0.057861328125, "learning_rate": 0.0024815641346010507, "loss": 1.2123, "step": 4004 }, { "epoch": 0.3512055563700538, "grad_norm": 0.057861328125, "learning_rate": 0.002481242935699185, "loss": 1.2523, "step": 4005 }, { "epoch": 0.35129324814442836, "grad_norm": 0.055419921875, "learning_rate": 0.00248092166099011, "loss": 1.2242, "step": 4006 }, { "epoch": 0.35138093991880287, "grad_norm": 0.08935546875, "learning_rate": 0.0024806003105031236, "loss": 1.3072, "step": 4007 }, { "epoch": 0.35146863169317744, "grad_norm": 0.0888671875, "learning_rate": 0.0024802788842675336, "loss": 1.1508, "step": 4008 }, { "epoch": 0.351556323467552, "grad_norm": 0.1064453125, "learning_rate": 0.0024799573823126525, "loss": 1.2774, "step": 4009 }, { "epoch": 0.3516440152419265, "grad_norm": 0.115234375, "learning_rate": 0.002479635804667801, "loss": 1.21, "step": 4010 }, { "epoch": 0.3517317070163011, "grad_norm": 0.10107421875, "learning_rate": 0.0024793141513623053, "loss": 1.2218, "step": 4011 }, { "epoch": 0.35181939879067564, "grad_norm": 0.06103515625, "learning_rate": 0.0024789924224255006, "loss": 1.2972, "step": 4012 }, { "epoch": 0.35190709056505015, "grad_norm": 0.162109375, "learning_rate": 0.0024786706178867267, "loss": 1.2153, "step": 4013 }, { "epoch": 0.3519947823394247, "grad_norm": 0.0830078125, "learning_rate": 0.0024783487377753313, "loss": 1.2653, "step": 4014 }, { "epoch": 0.3520824741137993, "grad_norm": 0.11328125, "learning_rate": 0.00247802678212067, "loss": 1.2505, "step": 4015 }, { "epoch": 0.3521701658881738, "grad_norm": 0.125, "learning_rate": 0.002477704750952103, "loss": 1.253, "step": 4016 }, { "epoch": 0.35225785766254836, "grad_norm": 0.08837890625, "learning_rate": 0.0024773826442989998, "loss": 1.2526, "step": 4017 }, { "epoch": 0.3523455494369229, "grad_norm": 0.09326171875, "learning_rate": 0.0024770604621907354, "loss": 1.3298, "step": 4018 }, { "epoch": 0.35243324121129743, "grad_norm": 0.0693359375, "learning_rate": 0.002476738204656692, "loss": 1.2623, "step": 4019 }, { "epoch": 0.352520932985672, "grad_norm": 0.130859375, "learning_rate": 0.0024764158717262576, "loss": 1.2897, "step": 4020 }, { "epoch": 0.3526086247600465, "grad_norm": 0.06103515625, "learning_rate": 0.0024760934634288304, "loss": 1.2636, "step": 4021 }, { "epoch": 0.3526963165344211, "grad_norm": 0.134765625, "learning_rate": 0.002475770979793811, "loss": 1.1908, "step": 4022 }, { "epoch": 0.35278400830879564, "grad_norm": 0.060546875, "learning_rate": 0.0024754484208506103, "loss": 1.2086, "step": 4023 }, { "epoch": 0.35287170008317015, "grad_norm": 0.125, "learning_rate": 0.0024751257866286445, "loss": 1.2736, "step": 4024 }, { "epoch": 0.3529593918575447, "grad_norm": 0.072265625, "learning_rate": 0.0024748030771573368, "loss": 1.2208, "step": 4025 }, { "epoch": 0.3530470836319193, "grad_norm": 0.0986328125, "learning_rate": 0.002474480292466118, "loss": 1.2159, "step": 4026 }, { "epoch": 0.3531347754062938, "grad_norm": 0.09814453125, "learning_rate": 0.0024741574325844247, "loss": 1.22, "step": 4027 }, { "epoch": 0.35322246718066835, "grad_norm": 0.0693359375, "learning_rate": 0.002473834497541701, "loss": 1.242, "step": 4028 }, { "epoch": 0.3533101589550429, "grad_norm": 0.11474609375, "learning_rate": 0.002473511487367398, "loss": 1.2504, "step": 4029 }, { "epoch": 0.35339785072941743, "grad_norm": 0.059326171875, "learning_rate": 0.0024731884020909737, "loss": 1.1998, "step": 4030 }, { "epoch": 0.353485542503792, "grad_norm": 0.05908203125, "learning_rate": 0.0024728652417418916, "loss": 1.2476, "step": 4031 }, { "epoch": 0.3535732342781665, "grad_norm": 0.053466796875, "learning_rate": 0.0024725420063496234, "loss": 1.2138, "step": 4032 }, { "epoch": 0.35366092605254107, "grad_norm": 0.0732421875, "learning_rate": 0.002472218695943648, "loss": 1.2296, "step": 4033 }, { "epoch": 0.35374861782691563, "grad_norm": 0.057861328125, "learning_rate": 0.00247189531055345, "loss": 1.2094, "step": 4034 }, { "epoch": 0.35383630960129014, "grad_norm": 0.0634765625, "learning_rate": 0.0024715718502085213, "loss": 1.2211, "step": 4035 }, { "epoch": 0.3539240013756647, "grad_norm": 0.095703125, "learning_rate": 0.0024712483149383615, "loss": 1.2155, "step": 4036 }, { "epoch": 0.3540116931500393, "grad_norm": 0.07177734375, "learning_rate": 0.002470924704772474, "loss": 1.2726, "step": 4037 }, { "epoch": 0.3540993849244138, "grad_norm": 0.123046875, "learning_rate": 0.002470601019740374, "loss": 1.1678, "step": 4038 }, { "epoch": 0.35418707669878835, "grad_norm": 0.11474609375, "learning_rate": 0.002470277259871579, "loss": 1.2479, "step": 4039 }, { "epoch": 0.3542747684731629, "grad_norm": 0.08935546875, "learning_rate": 0.002469953425195615, "loss": 1.2631, "step": 4040 }, { "epoch": 0.3543624602475374, "grad_norm": 0.1474609375, "learning_rate": 0.002469629515742015, "loss": 1.2269, "step": 4041 }, { "epoch": 0.354450152021912, "grad_norm": 0.0947265625, "learning_rate": 0.00246930553154032, "loss": 1.2761, "step": 4042 }, { "epoch": 0.3545378437962865, "grad_norm": 0.16796875, "learning_rate": 0.0024689814726200753, "loss": 1.2444, "step": 4043 }, { "epoch": 0.35462553557066107, "grad_norm": 0.06298828125, "learning_rate": 0.002468657339010834, "loss": 1.2283, "step": 4044 }, { "epoch": 0.35471322734503563, "grad_norm": 0.14453125, "learning_rate": 0.002468333130742157, "loss": 1.2291, "step": 4045 }, { "epoch": 0.35480091911941014, "grad_norm": 0.06396484375, "learning_rate": 0.0024680088478436116, "loss": 1.2712, "step": 4046 }, { "epoch": 0.3548886108937847, "grad_norm": 0.087890625, "learning_rate": 0.00246768449034477, "loss": 1.1907, "step": 4047 }, { "epoch": 0.35497630266815927, "grad_norm": 0.0546875, "learning_rate": 0.002467360058275214, "loss": 1.2553, "step": 4048 }, { "epoch": 0.3550639944425338, "grad_norm": 0.057861328125, "learning_rate": 0.002467035551664531, "loss": 1.2439, "step": 4049 }, { "epoch": 0.35515168621690835, "grad_norm": 0.0615234375, "learning_rate": 0.0024667109705423145, "loss": 1.2431, "step": 4050 }, { "epoch": 0.3552393779912829, "grad_norm": 0.07763671875, "learning_rate": 0.002466386314938166, "loss": 1.223, "step": 4051 }, { "epoch": 0.3553270697656574, "grad_norm": 0.062255859375, "learning_rate": 0.0024660615848816933, "loss": 1.2301, "step": 4052 }, { "epoch": 0.355414761540032, "grad_norm": 0.072265625, "learning_rate": 0.0024657367804025105, "loss": 1.2424, "step": 4053 }, { "epoch": 0.35550245331440655, "grad_norm": 0.0712890625, "learning_rate": 0.002465411901530239, "loss": 1.2089, "step": 4054 }, { "epoch": 0.35559014508878106, "grad_norm": 0.076171875, "learning_rate": 0.002465086948294507, "loss": 1.2503, "step": 4055 }, { "epoch": 0.3556778368631556, "grad_norm": 0.068359375, "learning_rate": 0.00246476192072495, "loss": 1.214, "step": 4056 }, { "epoch": 0.35576552863753014, "grad_norm": 0.08349609375, "learning_rate": 0.0024644368188512086, "loss": 1.2007, "step": 4057 }, { "epoch": 0.3558532204119047, "grad_norm": 0.0673828125, "learning_rate": 0.0024641116427029314, "loss": 1.1817, "step": 4058 }, { "epoch": 0.35594091218627927, "grad_norm": 0.07080078125, "learning_rate": 0.0024637863923097753, "loss": 1.2394, "step": 4059 }, { "epoch": 0.3560286039606538, "grad_norm": 0.10986328125, "learning_rate": 0.0024634610677013997, "loss": 1.2344, "step": 4060 }, { "epoch": 0.35611629573502834, "grad_norm": 0.064453125, "learning_rate": 0.0024631356689074754, "loss": 1.1813, "step": 4061 }, { "epoch": 0.3562039875094029, "grad_norm": 0.10595703125, "learning_rate": 0.002462810195957677, "loss": 1.2354, "step": 4062 }, { "epoch": 0.3562916792837774, "grad_norm": 0.08251953125, "learning_rate": 0.002462484648881687, "loss": 1.2346, "step": 4063 }, { "epoch": 0.356379371058152, "grad_norm": 0.06298828125, "learning_rate": 0.002462159027709194, "loss": 1.2428, "step": 4064 }, { "epoch": 0.35646706283252655, "grad_norm": 0.0751953125, "learning_rate": 0.0024618333324698954, "loss": 1.2804, "step": 4065 }, { "epoch": 0.35655475460690106, "grad_norm": 0.0791015625, "learning_rate": 0.0024615075631934913, "loss": 1.2782, "step": 4066 }, { "epoch": 0.3566424463812756, "grad_norm": 0.07421875, "learning_rate": 0.002461181719909693, "loss": 1.2567, "step": 4067 }, { "epoch": 0.35673013815565013, "grad_norm": 0.07470703125, "learning_rate": 0.0024608558026482158, "loss": 1.2578, "step": 4068 }, { "epoch": 0.3568178299300247, "grad_norm": 0.123046875, "learning_rate": 0.0024605298114387824, "loss": 1.2055, "step": 4069 }, { "epoch": 0.35690552170439926, "grad_norm": 0.06640625, "learning_rate": 0.002460203746311123, "loss": 1.2151, "step": 4070 }, { "epoch": 0.3569932134787738, "grad_norm": 0.1337890625, "learning_rate": 0.0024598776072949735, "loss": 1.2734, "step": 4071 }, { "epoch": 0.35708090525314834, "grad_norm": 0.07177734375, "learning_rate": 0.002459551394420077, "loss": 1.1889, "step": 4072 }, { "epoch": 0.3571685970275229, "grad_norm": 0.150390625, "learning_rate": 0.0024592251077161825, "loss": 1.2128, "step": 4073 }, { "epoch": 0.3572562888018974, "grad_norm": 0.1533203125, "learning_rate": 0.0024588987472130472, "loss": 1.249, "step": 4074 }, { "epoch": 0.357343980576272, "grad_norm": 0.05615234375, "learning_rate": 0.002458572312940435, "loss": 1.2933, "step": 4075 }, { "epoch": 0.35743167235064655, "grad_norm": 0.1142578125, "learning_rate": 0.0024582458049281146, "loss": 1.1748, "step": 4076 }, { "epoch": 0.35751936412502106, "grad_norm": 0.06396484375, "learning_rate": 0.0024579192232058638, "loss": 1.284, "step": 4077 }, { "epoch": 0.3576070558993956, "grad_norm": 0.1669921875, "learning_rate": 0.002457592567803465, "loss": 1.2144, "step": 4078 }, { "epoch": 0.35769474767377013, "grad_norm": 0.072265625, "learning_rate": 0.002457265838750709, "loss": 1.2689, "step": 4079 }, { "epoch": 0.3577824394481447, "grad_norm": 0.1787109375, "learning_rate": 0.002456939036077392, "loss": 1.2483, "step": 4080 }, { "epoch": 0.35787013122251926, "grad_norm": 0.06005859375, "learning_rate": 0.0024566121598133183, "loss": 1.1807, "step": 4081 }, { "epoch": 0.35795782299689377, "grad_norm": 0.1103515625, "learning_rate": 0.0024562852099882983, "loss": 1.2576, "step": 4082 }, { "epoch": 0.35804551477126834, "grad_norm": 0.0947265625, "learning_rate": 0.002455958186632148, "loss": 1.247, "step": 4083 }, { "epoch": 0.3581332065456429, "grad_norm": 0.10009765625, "learning_rate": 0.0024556310897746922, "loss": 1.2679, "step": 4084 }, { "epoch": 0.3582208983200174, "grad_norm": 0.1376953125, "learning_rate": 0.0024553039194457602, "loss": 1.1838, "step": 4085 }, { "epoch": 0.358308590094392, "grad_norm": 0.11865234375, "learning_rate": 0.00245497667567519, "loss": 1.2438, "step": 4086 }, { "epoch": 0.35839628186876654, "grad_norm": 0.1005859375, "learning_rate": 0.002454649358492825, "loss": 1.21, "step": 4087 }, { "epoch": 0.35848397364314105, "grad_norm": 0.08642578125, "learning_rate": 0.0024543219679285153, "loss": 1.2072, "step": 4088 }, { "epoch": 0.3585716654175156, "grad_norm": 0.0693359375, "learning_rate": 0.002453994504012119, "loss": 1.264, "step": 4089 }, { "epoch": 0.3586593571918902, "grad_norm": 0.068359375, "learning_rate": 0.002453666966773499, "loss": 1.151, "step": 4090 }, { "epoch": 0.3587470489662647, "grad_norm": 0.0859375, "learning_rate": 0.002453339356242527, "loss": 1.2501, "step": 4091 }, { "epoch": 0.35883474074063926, "grad_norm": 0.0986328125, "learning_rate": 0.002453011672449079, "loss": 1.2275, "step": 4092 }, { "epoch": 0.35892243251501377, "grad_norm": 0.1318359375, "learning_rate": 0.0024526839154230392, "loss": 1.2633, "step": 4093 }, { "epoch": 0.35901012428938833, "grad_norm": 0.0703125, "learning_rate": 0.0024523560851942984, "loss": 1.2741, "step": 4094 }, { "epoch": 0.3590978160637629, "grad_norm": 0.06298828125, "learning_rate": 0.0024520281817927542, "loss": 1.2424, "step": 4095 }, { "epoch": 0.3591855078381374, "grad_norm": 0.099609375, "learning_rate": 0.0024517002052483103, "loss": 1.2034, "step": 4096 }, { "epoch": 0.359273199612512, "grad_norm": 0.05859375, "learning_rate": 0.0024513721555908773, "loss": 1.1817, "step": 4097 }, { "epoch": 0.35936089138688654, "grad_norm": 0.0634765625, "learning_rate": 0.002451044032850372, "loss": 1.2916, "step": 4098 }, { "epoch": 0.35944858316126105, "grad_norm": 0.0576171875, "learning_rate": 0.0024507158370567195, "loss": 1.1975, "step": 4099 }, { "epoch": 0.3595362749356356, "grad_norm": 0.06982421875, "learning_rate": 0.0024503875682398493, "loss": 1.1979, "step": 4100 }, { "epoch": 0.3596239667100102, "grad_norm": 0.1025390625, "learning_rate": 0.002450059226429699, "loss": 1.1958, "step": 4101 }, { "epoch": 0.3597116584843847, "grad_norm": 0.0625, "learning_rate": 0.002449730811656213, "loss": 1.2397, "step": 4102 }, { "epoch": 0.35979935025875925, "grad_norm": 0.08740234375, "learning_rate": 0.0024494023239493407, "loss": 1.1891, "step": 4103 }, { "epoch": 0.35988704203313376, "grad_norm": 0.06103515625, "learning_rate": 0.002449073763339041, "loss": 1.2399, "step": 4104 }, { "epoch": 0.35997473380750833, "grad_norm": 0.07177734375, "learning_rate": 0.0024487451298552763, "loss": 1.2502, "step": 4105 }, { "epoch": 0.3600624255818829, "grad_norm": 0.09033203125, "learning_rate": 0.002448416423528018, "loss": 1.2331, "step": 4106 }, { "epoch": 0.3601501173562574, "grad_norm": 0.0849609375, "learning_rate": 0.0024480876443872426, "loss": 1.1849, "step": 4107 }, { "epoch": 0.36023780913063197, "grad_norm": 0.123046875, "learning_rate": 0.002447758792462934, "loss": 1.2439, "step": 4108 }, { "epoch": 0.36032550090500653, "grad_norm": 0.126953125, "learning_rate": 0.002447429867785084, "loss": 1.2655, "step": 4109 }, { "epoch": 0.36041319267938104, "grad_norm": 0.0888671875, "learning_rate": 0.0024471008703836877, "loss": 1.1895, "step": 4110 }, { "epoch": 0.3605008844537556, "grad_norm": 0.0869140625, "learning_rate": 0.00244677180028875, "loss": 1.2618, "step": 4111 }, { "epoch": 0.3605885762281302, "grad_norm": 0.12060546875, "learning_rate": 0.002446442657530281, "loss": 1.2265, "step": 4112 }, { "epoch": 0.3606762680025047, "grad_norm": 0.09521484375, "learning_rate": 0.0024461134421382975, "loss": 1.1804, "step": 4113 }, { "epoch": 0.36076395977687925, "grad_norm": 0.119140625, "learning_rate": 0.002445784154142824, "loss": 1.2288, "step": 4114 }, { "epoch": 0.36085165155125376, "grad_norm": 0.0859375, "learning_rate": 0.0024454547935738894, "loss": 1.2003, "step": 4115 }, { "epoch": 0.3609393433256283, "grad_norm": 0.10791015625, "learning_rate": 0.002445125360461531, "loss": 1.2285, "step": 4116 }, { "epoch": 0.3610270351000029, "grad_norm": 0.057861328125, "learning_rate": 0.002444795854835792, "loss": 1.2231, "step": 4117 }, { "epoch": 0.3611147268743774, "grad_norm": 0.10302734375, "learning_rate": 0.0024444662767267233, "loss": 1.178, "step": 4118 }, { "epoch": 0.36120241864875197, "grad_norm": 0.060546875, "learning_rate": 0.002444136626164381, "loss": 1.218, "step": 4119 }, { "epoch": 0.36129011042312653, "grad_norm": 0.06982421875, "learning_rate": 0.002443806903178829, "loss": 1.2378, "step": 4120 }, { "epoch": 0.36137780219750104, "grad_norm": 0.061279296875, "learning_rate": 0.0024434771078001355, "loss": 1.2145, "step": 4121 }, { "epoch": 0.3614654939718756, "grad_norm": 0.08837890625, "learning_rate": 0.0024431472400583787, "loss": 1.2082, "step": 4122 }, { "epoch": 0.36155318574625017, "grad_norm": 0.057373046875, "learning_rate": 0.0024428172999836407, "loss": 1.1719, "step": 4123 }, { "epoch": 0.3616408775206247, "grad_norm": 0.0830078125, "learning_rate": 0.002442487287606012, "loss": 1.2238, "step": 4124 }, { "epoch": 0.36172856929499925, "grad_norm": 0.07080078125, "learning_rate": 0.002442157202955588, "loss": 1.2641, "step": 4125 }, { "epoch": 0.3618162610693738, "grad_norm": 0.055908203125, "learning_rate": 0.002441827046062473, "loss": 1.2614, "step": 4126 }, { "epoch": 0.3619039528437483, "grad_norm": 0.0771484375, "learning_rate": 0.002441496816956775, "loss": 1.2982, "step": 4127 }, { "epoch": 0.3619916446181229, "grad_norm": 0.0712890625, "learning_rate": 0.00244116651566861, "loss": 1.1969, "step": 4128 }, { "epoch": 0.3620793363924974, "grad_norm": 0.130859375, "learning_rate": 0.0024408361422281017, "loss": 1.3066, "step": 4129 }, { "epoch": 0.36216702816687196, "grad_norm": 0.1611328125, "learning_rate": 0.0024405056966653785, "loss": 1.2297, "step": 4130 }, { "epoch": 0.36225471994124653, "grad_norm": 0.142578125, "learning_rate": 0.002440175179010577, "loss": 1.2664, "step": 4131 }, { "epoch": 0.36234241171562104, "grad_norm": 0.2060546875, "learning_rate": 0.002439844589293838, "loss": 1.2321, "step": 4132 }, { "epoch": 0.3624301034899956, "grad_norm": 0.13671875, "learning_rate": 0.0024395139275453126, "loss": 1.1562, "step": 4133 }, { "epoch": 0.36251779526437017, "grad_norm": 0.1630859375, "learning_rate": 0.0024391831937951546, "loss": 1.2472, "step": 4134 }, { "epoch": 0.3626054870387447, "grad_norm": 0.125, "learning_rate": 0.002438852388073526, "loss": 1.1922, "step": 4135 }, { "epoch": 0.36269317881311924, "grad_norm": 0.0751953125, "learning_rate": 0.002438521510410597, "loss": 1.2467, "step": 4136 }, { "epoch": 0.3627808705874938, "grad_norm": 0.181640625, "learning_rate": 0.002438190560836541, "loss": 1.2789, "step": 4137 }, { "epoch": 0.3628685623618683, "grad_norm": 0.08203125, "learning_rate": 0.0024378595393815413, "loss": 1.2012, "step": 4138 }, { "epoch": 0.3629562541362429, "grad_norm": 0.13671875, "learning_rate": 0.002437528446075785, "loss": 1.2478, "step": 4139 }, { "epoch": 0.3630439459106174, "grad_norm": 0.0556640625, "learning_rate": 0.0024371972809494676, "loss": 1.2276, "step": 4140 }, { "epoch": 0.36313163768499196, "grad_norm": 0.10986328125, "learning_rate": 0.00243686604403279, "loss": 1.2514, "step": 4141 }, { "epoch": 0.3632193294593665, "grad_norm": 0.072265625, "learning_rate": 0.002436534735355961, "loss": 1.2552, "step": 4142 }, { "epoch": 0.36330702123374103, "grad_norm": 0.06591796875, "learning_rate": 0.0024362033549491942, "loss": 1.2463, "step": 4143 }, { "epoch": 0.3633947130081156, "grad_norm": 0.076171875, "learning_rate": 0.0024358719028427112, "loss": 1.2228, "step": 4144 }, { "epoch": 0.36348240478249016, "grad_norm": 0.06396484375, "learning_rate": 0.0024355403790667393, "loss": 1.2576, "step": 4145 }, { "epoch": 0.3635700965568647, "grad_norm": 0.0634765625, "learning_rate": 0.0024352087836515125, "loss": 1.2173, "step": 4146 }, { "epoch": 0.36365778833123924, "grad_norm": 0.052490234375, "learning_rate": 0.0024348771166272714, "loss": 1.2169, "step": 4147 }, { "epoch": 0.3637454801056138, "grad_norm": 0.07470703125, "learning_rate": 0.0024345453780242644, "loss": 1.2968, "step": 4148 }, { "epoch": 0.3638331718799883, "grad_norm": 0.0537109375, "learning_rate": 0.0024342135678727434, "loss": 1.1889, "step": 4149 }, { "epoch": 0.3639208636543629, "grad_norm": 0.06201171875, "learning_rate": 0.0024338816862029696, "loss": 1.2823, "step": 4150 }, { "epoch": 0.3640085554287374, "grad_norm": 0.10888671875, "learning_rate": 0.0024335497330452097, "loss": 1.2502, "step": 4151 }, { "epoch": 0.36409624720311196, "grad_norm": 0.05615234375, "learning_rate": 0.0024332177084297364, "loss": 1.2259, "step": 4152 }, { "epoch": 0.3641839389774865, "grad_norm": 0.057373046875, "learning_rate": 0.0024328856123868306, "loss": 1.23, "step": 4153 }, { "epoch": 0.36427163075186103, "grad_norm": 0.080078125, "learning_rate": 0.002432553444946778, "loss": 1.2687, "step": 4154 }, { "epoch": 0.3643593225262356, "grad_norm": 0.05908203125, "learning_rate": 0.002432221206139871, "loss": 1.2651, "step": 4155 }, { "epoch": 0.36444701430061016, "grad_norm": 0.06494140625, "learning_rate": 0.0024318888959964096, "loss": 1.2028, "step": 4156 }, { "epoch": 0.36453470607498467, "grad_norm": 0.06884765625, "learning_rate": 0.0024315565145466987, "loss": 1.3254, "step": 4157 }, { "epoch": 0.36462239784935924, "grad_norm": 0.10791015625, "learning_rate": 0.002431224061821052, "loss": 1.2593, "step": 4158 }, { "epoch": 0.3647100896237338, "grad_norm": 0.0908203125, "learning_rate": 0.0024308915378497874, "loss": 1.2754, "step": 4159 }, { "epoch": 0.3647977813981083, "grad_norm": 0.08447265625, "learning_rate": 0.0024305589426632302, "loss": 1.2061, "step": 4160 }, { "epoch": 0.3648854731724829, "grad_norm": 0.08056640625, "learning_rate": 0.0024302262762917123, "loss": 1.2514, "step": 4161 }, { "epoch": 0.3649731649468574, "grad_norm": 0.06591796875, "learning_rate": 0.002429893538765572, "loss": 1.2475, "step": 4162 }, { "epoch": 0.36506085672123195, "grad_norm": 0.056640625, "learning_rate": 0.002429560730115155, "loss": 1.2831, "step": 4163 }, { "epoch": 0.3651485484956065, "grad_norm": 0.0654296875, "learning_rate": 0.002429227850370811, "loss": 1.2244, "step": 4164 }, { "epoch": 0.365236240269981, "grad_norm": 0.06640625, "learning_rate": 0.0024288948995628994, "loss": 1.261, "step": 4165 }, { "epoch": 0.3653239320443556, "grad_norm": 0.060791015625, "learning_rate": 0.0024285618777217834, "loss": 1.3021, "step": 4166 }, { "epoch": 0.36541162381873016, "grad_norm": 0.07080078125, "learning_rate": 0.002428228784877834, "loss": 1.1748, "step": 4167 }, { "epoch": 0.36549931559310467, "grad_norm": 0.0751953125, "learning_rate": 0.002427895621061428, "loss": 1.2323, "step": 4168 }, { "epoch": 0.36558700736747923, "grad_norm": 0.06201171875, "learning_rate": 0.00242756238630295, "loss": 1.17, "step": 4169 }, { "epoch": 0.3656746991418538, "grad_norm": 0.09326171875, "learning_rate": 0.002427229080632789, "loss": 1.2616, "step": 4170 }, { "epoch": 0.3657623909162283, "grad_norm": 0.059326171875, "learning_rate": 0.002426895704081343, "loss": 1.2038, "step": 4171 }, { "epoch": 0.3658500826906029, "grad_norm": 0.068359375, "learning_rate": 0.002426562256679014, "loss": 1.3371, "step": 4172 }, { "epoch": 0.36593777446497744, "grad_norm": 0.0537109375, "learning_rate": 0.002426228738456212, "loss": 1.2284, "step": 4173 }, { "epoch": 0.36602546623935195, "grad_norm": 0.1298828125, "learning_rate": 0.0024258951494433526, "loss": 1.2003, "step": 4174 }, { "epoch": 0.3661131580137265, "grad_norm": 0.10107421875, "learning_rate": 0.0024255614896708587, "loss": 1.275, "step": 4175 }, { "epoch": 0.366200849788101, "grad_norm": 0.076171875, "learning_rate": 0.0024252277591691592, "loss": 1.2514, "step": 4176 }, { "epoch": 0.3662885415624756, "grad_norm": 0.0732421875, "learning_rate": 0.002424893957968689, "loss": 1.1947, "step": 4177 }, { "epoch": 0.36637623333685015, "grad_norm": 0.060302734375, "learning_rate": 0.00242456008609989, "loss": 1.2662, "step": 4178 }, { "epoch": 0.36646392511122466, "grad_norm": 0.050537109375, "learning_rate": 0.0024242261435932114, "loss": 1.2162, "step": 4179 }, { "epoch": 0.36655161688559923, "grad_norm": 0.05712890625, "learning_rate": 0.0024238921304791067, "loss": 1.2248, "step": 4180 }, { "epoch": 0.3666393086599738, "grad_norm": 0.053466796875, "learning_rate": 0.0024235580467880376, "loss": 1.2357, "step": 4181 }, { "epoch": 0.3667270004343483, "grad_norm": 0.056884765625, "learning_rate": 0.002423223892550472, "loss": 1.219, "step": 4182 }, { "epoch": 0.36681469220872287, "grad_norm": 0.053955078125, "learning_rate": 0.0024228896677968826, "loss": 1.251, "step": 4183 }, { "epoch": 0.36690238398309744, "grad_norm": 0.0771484375, "learning_rate": 0.002422555372557751, "loss": 1.2522, "step": 4184 }, { "epoch": 0.36699007575747195, "grad_norm": 0.06640625, "learning_rate": 0.0024222210068635635, "loss": 1.2046, "step": 4185 }, { "epoch": 0.3670777675318465, "grad_norm": 0.057373046875, "learning_rate": 0.0024218865707448145, "loss": 1.2505, "step": 4186 }, { "epoch": 0.367165459306221, "grad_norm": 0.06494140625, "learning_rate": 0.0024215520642320025, "loss": 1.1666, "step": 4187 }, { "epoch": 0.3672531510805956, "grad_norm": 0.1259765625, "learning_rate": 0.0024212174873556335, "loss": 1.2288, "step": 4188 }, { "epoch": 0.36734084285497015, "grad_norm": 0.07080078125, "learning_rate": 0.002420882840146221, "loss": 1.2996, "step": 4189 }, { "epoch": 0.36742853462934466, "grad_norm": 0.09912109375, "learning_rate": 0.0024205481226342836, "loss": 1.229, "step": 4190 }, { "epoch": 0.3675162264037192, "grad_norm": 0.0634765625, "learning_rate": 0.0024202133348503463, "loss": 1.1714, "step": 4191 }, { "epoch": 0.3676039181780938, "grad_norm": 0.11669921875, "learning_rate": 0.002419878476824942, "loss": 1.2343, "step": 4192 }, { "epoch": 0.3676916099524683, "grad_norm": 0.0673828125, "learning_rate": 0.0024195435485886065, "loss": 1.264, "step": 4193 }, { "epoch": 0.36777930172684287, "grad_norm": 0.09423828125, "learning_rate": 0.0024192085501718867, "loss": 1.2988, "step": 4194 }, { "epoch": 0.36786699350121743, "grad_norm": 0.1396484375, "learning_rate": 0.002418873481605333, "loss": 1.2467, "step": 4195 }, { "epoch": 0.36795468527559194, "grad_norm": 0.0771484375, "learning_rate": 0.0024185383429195027, "loss": 1.2346, "step": 4196 }, { "epoch": 0.3680423770499665, "grad_norm": 0.08447265625, "learning_rate": 0.0024182031341449593, "loss": 1.2611, "step": 4197 }, { "epoch": 0.368130068824341, "grad_norm": 0.060302734375, "learning_rate": 0.0024178678553122735, "loss": 1.2263, "step": 4198 }, { "epoch": 0.3682177605987156, "grad_norm": 0.07958984375, "learning_rate": 0.002417532506452021, "loss": 1.2405, "step": 4199 }, { "epoch": 0.36830545237309015, "grad_norm": 0.07177734375, "learning_rate": 0.0024171970875947854, "loss": 1.2647, "step": 4200 }, { "epoch": 0.36839314414746466, "grad_norm": 0.0546875, "learning_rate": 0.0024168615987711557, "loss": 1.2278, "step": 4201 }, { "epoch": 0.3684808359218392, "grad_norm": 0.06103515625, "learning_rate": 0.0024165260400117283, "loss": 1.2645, "step": 4202 }, { "epoch": 0.3685685276962138, "grad_norm": 0.072265625, "learning_rate": 0.0024161904113471043, "loss": 1.2453, "step": 4203 }, { "epoch": 0.3686562194705883, "grad_norm": 0.08544921875, "learning_rate": 0.0024158547128078933, "loss": 1.1918, "step": 4204 }, { "epoch": 0.36874391124496286, "grad_norm": 0.08056640625, "learning_rate": 0.002415518944424709, "loss": 1.2471, "step": 4205 }, { "epoch": 0.36883160301933743, "grad_norm": 0.09619140625, "learning_rate": 0.002415183106228174, "loss": 1.1988, "step": 4206 }, { "epoch": 0.36891929479371194, "grad_norm": 0.07568359375, "learning_rate": 0.0024148471982489146, "loss": 1.2654, "step": 4207 }, { "epoch": 0.3690069865680865, "grad_norm": 0.0771484375, "learning_rate": 0.0024145112205175645, "loss": 1.2561, "step": 4208 }, { "epoch": 0.36909467834246107, "grad_norm": 0.06787109375, "learning_rate": 0.0024141751730647654, "loss": 1.2271, "step": 4209 }, { "epoch": 0.3691823701168356, "grad_norm": 0.07080078125, "learning_rate": 0.0024138390559211628, "loss": 1.2972, "step": 4210 }, { "epoch": 0.36927006189121014, "grad_norm": 0.06103515625, "learning_rate": 0.0024135028691174105, "loss": 1.2858, "step": 4211 }, { "epoch": 0.36935775366558465, "grad_norm": 0.07373046875, "learning_rate": 0.0024131666126841673, "loss": 1.1704, "step": 4212 }, { "epoch": 0.3694454454399592, "grad_norm": 0.09423828125, "learning_rate": 0.002412830286652099, "loss": 1.201, "step": 4213 }, { "epoch": 0.3695331372143338, "grad_norm": 0.07373046875, "learning_rate": 0.0024124938910518785, "loss": 1.2114, "step": 4214 }, { "epoch": 0.3696208289887083, "grad_norm": 0.08935546875, "learning_rate": 0.002412157425914183, "loss": 1.2654, "step": 4215 }, { "epoch": 0.36970852076308286, "grad_norm": 0.06103515625, "learning_rate": 0.0024118208912696977, "loss": 1.2176, "step": 4216 }, { "epoch": 0.3697962125374574, "grad_norm": 0.0849609375, "learning_rate": 0.0024114842871491137, "loss": 1.2308, "step": 4217 }, { "epoch": 0.36988390431183193, "grad_norm": 0.0751953125, "learning_rate": 0.0024111476135831285, "loss": 1.2151, "step": 4218 }, { "epoch": 0.3699715960862065, "grad_norm": 0.0634765625, "learning_rate": 0.002410810870602446, "loss": 1.3195, "step": 4219 }, { "epoch": 0.37005928786058107, "grad_norm": 0.06787109375, "learning_rate": 0.002410474058237776, "loss": 1.2192, "step": 4220 }, { "epoch": 0.3701469796349556, "grad_norm": 0.07080078125, "learning_rate": 0.002410137176519835, "loss": 1.1865, "step": 4221 }, { "epoch": 0.37023467140933014, "grad_norm": 0.0576171875, "learning_rate": 0.002409800225479346, "loss": 1.2827, "step": 4222 }, { "epoch": 0.37032236318370465, "grad_norm": 0.0859375, "learning_rate": 0.0024094632051470375, "loss": 1.2497, "step": 4223 }, { "epoch": 0.3704100549580792, "grad_norm": 0.11572265625, "learning_rate": 0.002409126115553645, "loss": 1.2393, "step": 4224 }, { "epoch": 0.3704977467324538, "grad_norm": 0.053955078125, "learning_rate": 0.0024087889567299113, "loss": 1.2146, "step": 4225 }, { "epoch": 0.3705854385068283, "grad_norm": 0.10498046875, "learning_rate": 0.002408451728706583, "loss": 1.258, "step": 4226 }, { "epoch": 0.37067313028120286, "grad_norm": 0.12158203125, "learning_rate": 0.002408114431514415, "loss": 1.2406, "step": 4227 }, { "epoch": 0.3707608220555774, "grad_norm": 0.1416015625, "learning_rate": 0.0024077770651841682, "loss": 1.2481, "step": 4228 }, { "epoch": 0.37084851382995193, "grad_norm": 0.234375, "learning_rate": 0.002407439629746609, "loss": 1.2368, "step": 4229 }, { "epoch": 0.3709362056043265, "grad_norm": 0.181640625, "learning_rate": 0.0024071021252325105, "loss": 1.2432, "step": 4230 }, { "epoch": 0.37102389737870106, "grad_norm": 0.2119140625, "learning_rate": 0.0024067645516726533, "loss": 1.2295, "step": 4231 }, { "epoch": 0.37111158915307557, "grad_norm": 0.228515625, "learning_rate": 0.0024064269090978228, "loss": 1.332, "step": 4232 }, { "epoch": 0.37119928092745014, "grad_norm": 0.29296875, "learning_rate": 0.00240608919753881, "loss": 1.3393, "step": 4233 }, { "epoch": 0.37128697270182465, "grad_norm": 0.11865234375, "learning_rate": 0.002405751417026415, "loss": 1.273, "step": 4234 }, { "epoch": 0.3713746644761992, "grad_norm": 0.1689453125, "learning_rate": 0.002405413567591441, "loss": 1.2548, "step": 4235 }, { "epoch": 0.3714623562505738, "grad_norm": 0.173828125, "learning_rate": 0.0024050756492647, "loss": 1.2562, "step": 4236 }, { "epoch": 0.3715500480249483, "grad_norm": 0.061279296875, "learning_rate": 0.002404737662077009, "loss": 1.2574, "step": 4237 }, { "epoch": 0.37163773979932285, "grad_norm": 0.12890625, "learning_rate": 0.002404399606059192, "loss": 1.2304, "step": 4238 }, { "epoch": 0.3717254315736974, "grad_norm": 0.13671875, "learning_rate": 0.002404061481242078, "loss": 1.2387, "step": 4239 }, { "epoch": 0.37181312334807193, "grad_norm": 0.06982421875, "learning_rate": 0.0024037232876565036, "loss": 1.2365, "step": 4240 }, { "epoch": 0.3719008151224465, "grad_norm": 0.111328125, "learning_rate": 0.002403385025333311, "loss": 1.1844, "step": 4241 }, { "epoch": 0.37198850689682106, "grad_norm": 0.10693359375, "learning_rate": 0.0024030466943033495, "loss": 1.2766, "step": 4242 }, { "epoch": 0.37207619867119557, "grad_norm": 0.07666015625, "learning_rate": 0.002402708294597472, "loss": 1.1834, "step": 4243 }, { "epoch": 0.37216389044557013, "grad_norm": 0.20703125, "learning_rate": 0.0024023698262465422, "loss": 1.1967, "step": 4244 }, { "epoch": 0.3722515822199447, "grad_norm": 0.072265625, "learning_rate": 0.0024020312892814267, "loss": 1.175, "step": 4245 }, { "epoch": 0.3723392739943192, "grad_norm": 0.1455078125, "learning_rate": 0.0024016926837329986, "loss": 1.2375, "step": 4246 }, { "epoch": 0.3724269657686938, "grad_norm": 0.1328125, "learning_rate": 0.0024013540096321385, "loss": 1.2166, "step": 4247 }, { "epoch": 0.3725146575430683, "grad_norm": 0.08447265625, "learning_rate": 0.0024010152670097323, "loss": 1.2486, "step": 4248 }, { "epoch": 0.37260234931744285, "grad_norm": 0.17578125, "learning_rate": 0.002400676455896672, "loss": 1.2491, "step": 4249 }, { "epoch": 0.3726900410918174, "grad_norm": 0.0712890625, "learning_rate": 0.0024003375763238573, "loss": 1.2755, "step": 4250 }, { "epoch": 0.3727777328661919, "grad_norm": 0.19140625, "learning_rate": 0.0023999986283221924, "loss": 1.1997, "step": 4251 }, { "epoch": 0.3728654246405665, "grad_norm": 0.0791015625, "learning_rate": 0.002399659611922589, "loss": 1.2781, "step": 4252 }, { "epoch": 0.37295311641494105, "grad_norm": 0.1259765625, "learning_rate": 0.0023993205271559638, "loss": 1.2294, "step": 4253 }, { "epoch": 0.37304080818931556, "grad_norm": 0.1103515625, "learning_rate": 0.002398981374053241, "loss": 1.203, "step": 4254 }, { "epoch": 0.37312849996369013, "grad_norm": 0.10400390625, "learning_rate": 0.0023986421526453502, "loss": 1.2571, "step": 4255 }, { "epoch": 0.3732161917380647, "grad_norm": 0.07861328125, "learning_rate": 0.002398302862963228, "loss": 1.1867, "step": 4256 }, { "epoch": 0.3733038835124392, "grad_norm": 0.0703125, "learning_rate": 0.002397963505037816, "loss": 1.2107, "step": 4257 }, { "epoch": 0.37339157528681377, "grad_norm": 0.068359375, "learning_rate": 0.002397624078900064, "loss": 1.2421, "step": 4258 }, { "epoch": 0.3734792670611883, "grad_norm": 0.0634765625, "learning_rate": 0.0023972845845809257, "loss": 1.2673, "step": 4259 }, { "epoch": 0.37356695883556285, "grad_norm": 0.05615234375, "learning_rate": 0.002396945022111362, "loss": 1.2635, "step": 4260 }, { "epoch": 0.3736546506099374, "grad_norm": 0.0634765625, "learning_rate": 0.0023966053915223406, "loss": 1.2156, "step": 4261 }, { "epoch": 0.3737423423843119, "grad_norm": 0.0654296875, "learning_rate": 0.002396265692844835, "loss": 1.1726, "step": 4262 }, { "epoch": 0.3738300341586865, "grad_norm": 0.060546875, "learning_rate": 0.002395925926109825, "loss": 1.3187, "step": 4263 }, { "epoch": 0.37391772593306105, "grad_norm": 0.07470703125, "learning_rate": 0.0023955860913482956, "loss": 1.2546, "step": 4264 }, { "epoch": 0.37400541770743556, "grad_norm": 0.051513671875, "learning_rate": 0.0023952461885912396, "loss": 1.2549, "step": 4265 }, { "epoch": 0.3740931094818101, "grad_norm": 0.078125, "learning_rate": 0.002394906217869655, "loss": 1.2804, "step": 4266 }, { "epoch": 0.3741808012561847, "grad_norm": 0.057373046875, "learning_rate": 0.0023945661792145463, "loss": 1.1857, "step": 4267 }, { "epoch": 0.3742684930305592, "grad_norm": 0.07275390625, "learning_rate": 0.0023942260726569245, "loss": 1.2305, "step": 4268 }, { "epoch": 0.37435618480493377, "grad_norm": 0.05419921875, "learning_rate": 0.002393885898227805, "loss": 1.2274, "step": 4269 }, { "epoch": 0.3744438765793083, "grad_norm": 0.0673828125, "learning_rate": 0.0023935456559582135, "loss": 1.2099, "step": 4270 }, { "epoch": 0.37453156835368284, "grad_norm": 0.051025390625, "learning_rate": 0.0023932053458791762, "loss": 1.201, "step": 4271 }, { "epoch": 0.3746192601280574, "grad_norm": 0.055908203125, "learning_rate": 0.0023928649680217302, "loss": 1.2449, "step": 4272 }, { "epoch": 0.3747069519024319, "grad_norm": 0.0556640625, "learning_rate": 0.0023925245224169175, "loss": 1.2156, "step": 4273 }, { "epoch": 0.3747946436768065, "grad_norm": 0.057861328125, "learning_rate": 0.0023921840090957848, "loss": 1.2577, "step": 4274 }, { "epoch": 0.37488233545118105, "grad_norm": 0.0634765625, "learning_rate": 0.0023918434280893863, "loss": 1.241, "step": 4275 }, { "epoch": 0.37497002722555556, "grad_norm": 0.056884765625, "learning_rate": 0.0023915027794287826, "loss": 1.2092, "step": 4276 }, { "epoch": 0.3750577189999301, "grad_norm": 0.07666015625, "learning_rate": 0.002391162063145039, "loss": 1.2385, "step": 4277 }, { "epoch": 0.3751454107743047, "grad_norm": 0.06591796875, "learning_rate": 0.002390821279269229, "loss": 1.2399, "step": 4278 }, { "epoch": 0.3752331025486792, "grad_norm": 0.06689453125, "learning_rate": 0.002390480427832431, "loss": 1.2067, "step": 4279 }, { "epoch": 0.37532079432305376, "grad_norm": 0.10009765625, "learning_rate": 0.0023901395088657285, "loss": 1.2323, "step": 4280 }, { "epoch": 0.37540848609742833, "grad_norm": 0.078125, "learning_rate": 0.002389798522400214, "loss": 1.2105, "step": 4281 }, { "epoch": 0.37549617787180284, "grad_norm": 0.1279296875, "learning_rate": 0.0023894574684669846, "loss": 1.2275, "step": 4282 }, { "epoch": 0.3755838696461774, "grad_norm": 0.11279296875, "learning_rate": 0.0023891163470971424, "loss": 1.2727, "step": 4283 }, { "epoch": 0.3756715614205519, "grad_norm": 0.0849609375, "learning_rate": 0.0023887751583217976, "loss": 1.1941, "step": 4284 }, { "epoch": 0.3757592531949265, "grad_norm": 0.064453125, "learning_rate": 0.002388433902172065, "loss": 1.191, "step": 4285 }, { "epoch": 0.37584694496930104, "grad_norm": 0.06787109375, "learning_rate": 0.0023880925786790674, "loss": 1.2051, "step": 4286 }, { "epoch": 0.37593463674367555, "grad_norm": 0.08349609375, "learning_rate": 0.0023877511878739317, "loss": 1.2768, "step": 4287 }, { "epoch": 0.3760223285180501, "grad_norm": 0.056640625, "learning_rate": 0.0023874097297877923, "loss": 1.2653, "step": 4288 }, { "epoch": 0.3761100202924247, "grad_norm": 0.1162109375, "learning_rate": 0.002387068204451789, "loss": 1.2395, "step": 4289 }, { "epoch": 0.3761977120667992, "grad_norm": 0.06640625, "learning_rate": 0.002386726611897068, "loss": 1.2364, "step": 4290 }, { "epoch": 0.37628540384117376, "grad_norm": 0.057373046875, "learning_rate": 0.0023863849521547826, "loss": 1.2165, "step": 4291 }, { "epoch": 0.3763730956155483, "grad_norm": 0.061279296875, "learning_rate": 0.0023860432252560903, "loss": 1.2036, "step": 4292 }, { "epoch": 0.37646078738992284, "grad_norm": 0.0791015625, "learning_rate": 0.002385701431232155, "loss": 1.327, "step": 4293 }, { "epoch": 0.3765484791642974, "grad_norm": 0.1298828125, "learning_rate": 0.0023853595701141496, "loss": 1.2094, "step": 4294 }, { "epoch": 0.3766361709386719, "grad_norm": 0.06689453125, "learning_rate": 0.0023850176419332493, "loss": 1.2244, "step": 4295 }, { "epoch": 0.3767238627130465, "grad_norm": 0.08642578125, "learning_rate": 0.0023846756467206373, "loss": 1.2491, "step": 4296 }, { "epoch": 0.37681155448742104, "grad_norm": 0.0771484375, "learning_rate": 0.0023843335845075025, "loss": 1.2624, "step": 4297 }, { "epoch": 0.37689924626179555, "grad_norm": 0.0712890625, "learning_rate": 0.002383991455325041, "loss": 1.1655, "step": 4298 }, { "epoch": 0.3769869380361701, "grad_norm": 0.10888671875, "learning_rate": 0.0023836492592044533, "loss": 1.2803, "step": 4299 }, { "epoch": 0.3770746298105447, "grad_norm": 0.095703125, "learning_rate": 0.0023833069961769478, "loss": 1.2761, "step": 4300 }, { "epoch": 0.3771623215849192, "grad_norm": 0.10205078125, "learning_rate": 0.0023829646662737367, "loss": 1.1697, "step": 4301 }, { "epoch": 0.37725001335929376, "grad_norm": 0.08447265625, "learning_rate": 0.0023826222695260395, "loss": 1.2677, "step": 4302 }, { "epoch": 0.3773377051336683, "grad_norm": 0.1513671875, "learning_rate": 0.0023822798059650835, "loss": 1.2454, "step": 4303 }, { "epoch": 0.37742539690804283, "grad_norm": 0.06298828125, "learning_rate": 0.0023819372756220996, "loss": 1.2133, "step": 4304 }, { "epoch": 0.3775130886824174, "grad_norm": 0.126953125, "learning_rate": 0.002381594678528325, "loss": 1.2067, "step": 4305 }, { "epoch": 0.3776007804567919, "grad_norm": 0.08349609375, "learning_rate": 0.0023812520147150048, "loss": 1.1974, "step": 4306 }, { "epoch": 0.37768847223116647, "grad_norm": 0.11181640625, "learning_rate": 0.0023809092842133886, "loss": 1.2032, "step": 4307 }, { "epoch": 0.37777616400554104, "grad_norm": 0.1064453125, "learning_rate": 0.0023805664870547322, "loss": 1.2481, "step": 4308 }, { "epoch": 0.37786385577991555, "grad_norm": 0.07861328125, "learning_rate": 0.0023802236232702985, "loss": 1.2823, "step": 4309 }, { "epoch": 0.3779515475542901, "grad_norm": 0.080078125, "learning_rate": 0.0023798806928913554, "loss": 1.228, "step": 4310 }, { "epoch": 0.3780392393286647, "grad_norm": 0.06494140625, "learning_rate": 0.002379537695949177, "loss": 1.2439, "step": 4311 }, { "epoch": 0.3781269311030392, "grad_norm": 0.0771484375, "learning_rate": 0.002379194632475044, "loss": 1.2754, "step": 4312 }, { "epoch": 0.37821462287741375, "grad_norm": 0.109375, "learning_rate": 0.002378851502500243, "loss": 1.2953, "step": 4313 }, { "epoch": 0.3783023146517883, "grad_norm": 0.10693359375, "learning_rate": 0.0023785083060560664, "loss": 1.2274, "step": 4314 }, { "epoch": 0.37839000642616283, "grad_norm": 0.07470703125, "learning_rate": 0.002378165043173813, "loss": 1.2105, "step": 4315 }, { "epoch": 0.3784776982005374, "grad_norm": 0.1064453125, "learning_rate": 0.0023778217138847877, "loss": 1.2152, "step": 4316 }, { "epoch": 0.37856538997491196, "grad_norm": 0.09228515625, "learning_rate": 0.002377478318220301, "loss": 1.2334, "step": 4317 }, { "epoch": 0.37865308174928647, "grad_norm": 0.11962890625, "learning_rate": 0.0023771348562116696, "loss": 1.2716, "step": 4318 }, { "epoch": 0.37874077352366103, "grad_norm": 0.1064453125, "learning_rate": 0.0023767913278902163, "loss": 1.2051, "step": 4319 }, { "epoch": 0.37882846529803554, "grad_norm": 0.099609375, "learning_rate": 0.0023764477332872697, "loss": 1.2133, "step": 4320 }, { "epoch": 0.3789161570724101, "grad_norm": 0.1083984375, "learning_rate": 0.002376104072434165, "loss": 1.2233, "step": 4321 }, { "epoch": 0.3790038488467847, "grad_norm": 0.060302734375, "learning_rate": 0.002375760345362244, "loss": 1.2359, "step": 4322 }, { "epoch": 0.3790915406211592, "grad_norm": 0.1962890625, "learning_rate": 0.0023754165521028526, "loss": 1.2538, "step": 4323 }, { "epoch": 0.37917923239553375, "grad_norm": 0.07763671875, "learning_rate": 0.0023750726926873443, "loss": 1.1927, "step": 4324 }, { "epoch": 0.3792669241699083, "grad_norm": 0.1875, "learning_rate": 0.002374728767147079, "loss": 1.2265, "step": 4325 }, { "epoch": 0.3793546159442828, "grad_norm": 0.08447265625, "learning_rate": 0.00237438477551342, "loss": 1.2759, "step": 4326 }, { "epoch": 0.3794423077186574, "grad_norm": 0.2158203125, "learning_rate": 0.0023740407178177406, "loss": 1.2054, "step": 4327 }, { "epoch": 0.37952999949303196, "grad_norm": 0.140625, "learning_rate": 0.0023736965940914156, "loss": 1.2636, "step": 4328 }, { "epoch": 0.37961769126740647, "grad_norm": 0.2119140625, "learning_rate": 0.00237335240436583, "loss": 1.2604, "step": 4329 }, { "epoch": 0.37970538304178103, "grad_norm": 0.1357421875, "learning_rate": 0.0023730081486723724, "loss": 1.2587, "step": 4330 }, { "epoch": 0.37979307481615554, "grad_norm": 0.1103515625, "learning_rate": 0.002372663827042438, "loss": 1.2913, "step": 4331 }, { "epoch": 0.3798807665905301, "grad_norm": 0.1728515625, "learning_rate": 0.002372319439507428, "loss": 1.2479, "step": 4332 }, { "epoch": 0.37996845836490467, "grad_norm": 0.09423828125, "learning_rate": 0.00237197498609875, "loss": 1.231, "step": 4333 }, { "epoch": 0.3800561501392792, "grad_norm": 0.12060546875, "learning_rate": 0.0023716304668478174, "loss": 1.1995, "step": 4334 }, { "epoch": 0.38014384191365375, "grad_norm": 0.107421875, "learning_rate": 0.0023712858817860486, "loss": 1.2653, "step": 4335 }, { "epoch": 0.3802315336880283, "grad_norm": 0.076171875, "learning_rate": 0.0023709412309448692, "loss": 1.2532, "step": 4336 }, { "epoch": 0.3803192254624028, "grad_norm": 0.11083984375, "learning_rate": 0.0023705965143557112, "loss": 1.2158, "step": 4337 }, { "epoch": 0.3804069172367774, "grad_norm": 0.1044921875, "learning_rate": 0.002370251732050011, "loss": 1.2872, "step": 4338 }, { "epoch": 0.38049460901115195, "grad_norm": 0.126953125, "learning_rate": 0.0023699068840592123, "loss": 1.2285, "step": 4339 }, { "epoch": 0.38058230078552646, "grad_norm": 0.1435546875, "learning_rate": 0.002369561970414764, "loss": 1.2282, "step": 4340 }, { "epoch": 0.380669992559901, "grad_norm": 0.059814453125, "learning_rate": 0.0023692169911481214, "loss": 1.2569, "step": 4341 }, { "epoch": 0.38075768433427554, "grad_norm": 0.140625, "learning_rate": 0.002368871946290746, "loss": 1.179, "step": 4342 }, { "epoch": 0.3808453761086501, "grad_norm": 0.10888671875, "learning_rate": 0.002368526835874105, "loss": 1.2525, "step": 4343 }, { "epoch": 0.38093306788302467, "grad_norm": 0.072265625, "learning_rate": 0.0023681816599296717, "loss": 1.262, "step": 4344 }, { "epoch": 0.3810207596573992, "grad_norm": 0.16796875, "learning_rate": 0.0023678364184889244, "loss": 1.2388, "step": 4345 }, { "epoch": 0.38110845143177374, "grad_norm": 0.06005859375, "learning_rate": 0.0023674911115833487, "loss": 1.2195, "step": 4346 }, { "epoch": 0.3811961432061483, "grad_norm": 0.1767578125, "learning_rate": 0.0023671457392444363, "loss": 1.2004, "step": 4347 }, { "epoch": 0.3812838349805228, "grad_norm": 0.07373046875, "learning_rate": 0.002366800301503684, "loss": 1.2, "step": 4348 }, { "epoch": 0.3813715267548974, "grad_norm": 0.1015625, "learning_rate": 0.0023664547983925943, "loss": 1.224, "step": 4349 }, { "epoch": 0.38145921852927195, "grad_norm": 0.1416015625, "learning_rate": 0.0023661092299426764, "loss": 1.2385, "step": 4350 }, { "epoch": 0.38154691030364646, "grad_norm": 0.0625, "learning_rate": 0.0023657635961854455, "loss": 1.2139, "step": 4351 }, { "epoch": 0.381634602078021, "grad_norm": 0.126953125, "learning_rate": 0.0023654178971524225, "loss": 1.1932, "step": 4352 }, { "epoch": 0.3817222938523956, "grad_norm": 0.08935546875, "learning_rate": 0.002365072132875134, "loss": 1.2314, "step": 4353 }, { "epoch": 0.3818099856267701, "grad_norm": 0.083984375, "learning_rate": 0.0023647263033851128, "loss": 1.2109, "step": 4354 }, { "epoch": 0.38189767740114466, "grad_norm": 0.095703125, "learning_rate": 0.0023643804087138975, "loss": 1.2568, "step": 4355 }, { "epoch": 0.3819853691755192, "grad_norm": 0.059326171875, "learning_rate": 0.0023640344488930335, "loss": 1.1851, "step": 4356 }, { "epoch": 0.38207306094989374, "grad_norm": 0.0634765625, "learning_rate": 0.002363688423954071, "loss": 1.2345, "step": 4357 }, { "epoch": 0.3821607527242683, "grad_norm": 0.0908203125, "learning_rate": 0.0023633423339285667, "loss": 1.2647, "step": 4358 }, { "epoch": 0.3822484444986428, "grad_norm": 0.064453125, "learning_rate": 0.0023629961788480826, "loss": 1.2419, "step": 4359 }, { "epoch": 0.3823361362730174, "grad_norm": 0.09375, "learning_rate": 0.0023626499587441883, "loss": 1.2377, "step": 4360 }, { "epoch": 0.38242382804739194, "grad_norm": 0.059326171875, "learning_rate": 0.002362303673648457, "loss": 1.2078, "step": 4361 }, { "epoch": 0.38251151982176645, "grad_norm": 0.06689453125, "learning_rate": 0.0023619573235924697, "loss": 1.2894, "step": 4362 }, { "epoch": 0.382599211596141, "grad_norm": 0.099609375, "learning_rate": 0.0023616109086078124, "loss": 1.1803, "step": 4363 }, { "epoch": 0.3826869033705156, "grad_norm": 0.058349609375, "learning_rate": 0.0023612644287260764, "loss": 1.2311, "step": 4364 }, { "epoch": 0.3827745951448901, "grad_norm": 0.0869140625, "learning_rate": 0.002360917883978861, "loss": 1.2578, "step": 4365 }, { "epoch": 0.38286228691926466, "grad_norm": 0.0634765625, "learning_rate": 0.0023605712743977702, "loss": 1.1635, "step": 4366 }, { "epoch": 0.38294997869363917, "grad_norm": 0.091796875, "learning_rate": 0.0023602246000144133, "loss": 1.3389, "step": 4367 }, { "epoch": 0.38303767046801374, "grad_norm": 0.130859375, "learning_rate": 0.0023598778608604066, "loss": 1.282, "step": 4368 }, { "epoch": 0.3831253622423883, "grad_norm": 0.07958984375, "learning_rate": 0.002359531056967372, "loss": 1.2124, "step": 4369 }, { "epoch": 0.3832130540167628, "grad_norm": 0.1181640625, "learning_rate": 0.0023591841883669356, "loss": 1.1968, "step": 4370 }, { "epoch": 0.3833007457911374, "grad_norm": 0.09375, "learning_rate": 0.0023588372550907323, "loss": 1.2372, "step": 4371 }, { "epoch": 0.38338843756551194, "grad_norm": 0.0986328125, "learning_rate": 0.0023584902571704013, "loss": 1.2279, "step": 4372 }, { "epoch": 0.38347612933988645, "grad_norm": 0.11376953125, "learning_rate": 0.0023581431946375873, "loss": 1.2229, "step": 4373 }, { "epoch": 0.383563821114261, "grad_norm": 0.08203125, "learning_rate": 0.002357796067523943, "loss": 1.2293, "step": 4374 }, { "epoch": 0.3836515128886356, "grad_norm": 0.1015625, "learning_rate": 0.002357448875861124, "loss": 1.217, "step": 4375 }, { "epoch": 0.3837392046630101, "grad_norm": 0.057861328125, "learning_rate": 0.0023571016196807936, "loss": 1.2162, "step": 4376 }, { "epoch": 0.38382689643738466, "grad_norm": 0.1083984375, "learning_rate": 0.0023567542990146208, "loss": 1.2112, "step": 4377 }, { "epoch": 0.38391458821175917, "grad_norm": 0.05419921875, "learning_rate": 0.0023564069138942807, "loss": 1.2131, "step": 4378 }, { "epoch": 0.38400227998613373, "grad_norm": 0.10986328125, "learning_rate": 0.0023560594643514537, "loss": 1.2752, "step": 4379 }, { "epoch": 0.3840899717605083, "grad_norm": 0.07666015625, "learning_rate": 0.002355711950417826, "loss": 1.228, "step": 4380 }, { "epoch": 0.3841776635348828, "grad_norm": 0.087890625, "learning_rate": 0.0023553643721250903, "loss": 1.2293, "step": 4381 }, { "epoch": 0.3842653553092574, "grad_norm": 0.076171875, "learning_rate": 0.002355016729504945, "loss": 1.1762, "step": 4382 }, { "epoch": 0.38435304708363194, "grad_norm": 0.0673828125, "learning_rate": 0.0023546690225890936, "loss": 1.139, "step": 4383 }, { "epoch": 0.38444073885800645, "grad_norm": 0.07763671875, "learning_rate": 0.0023543212514092467, "loss": 1.1875, "step": 4384 }, { "epoch": 0.384528430632381, "grad_norm": 0.08056640625, "learning_rate": 0.0023539734159971197, "loss": 1.1852, "step": 4385 }, { "epoch": 0.3846161224067556, "grad_norm": 0.1181640625, "learning_rate": 0.002353625516384434, "loss": 1.2606, "step": 4386 }, { "epoch": 0.3847038141811301, "grad_norm": 0.12353515625, "learning_rate": 0.0023532775526029183, "loss": 1.2606, "step": 4387 }, { "epoch": 0.38479150595550465, "grad_norm": 0.1474609375, "learning_rate": 0.0023529295246843052, "loss": 1.2572, "step": 4388 }, { "epoch": 0.38487919772987916, "grad_norm": 0.1337890625, "learning_rate": 0.002352581432660334, "loss": 1.2626, "step": 4389 }, { "epoch": 0.38496688950425373, "grad_norm": 0.09375, "learning_rate": 0.0023522332765627493, "loss": 1.2228, "step": 4390 }, { "epoch": 0.3850545812786283, "grad_norm": 0.1005859375, "learning_rate": 0.0023518850564233033, "loss": 1.1929, "step": 4391 }, { "epoch": 0.3851422730530028, "grad_norm": 0.06591796875, "learning_rate": 0.0023515367722737514, "loss": 1.2288, "step": 4392 }, { "epoch": 0.38522996482737737, "grad_norm": 0.09521484375, "learning_rate": 0.0023511884241458575, "loss": 1.2818, "step": 4393 }, { "epoch": 0.38531765660175193, "grad_norm": 0.09521484375, "learning_rate": 0.0023508400120713898, "loss": 1.248, "step": 4394 }, { "epoch": 0.38540534837612644, "grad_norm": 0.115234375, "learning_rate": 0.0023504915360821215, "loss": 1.2718, "step": 4395 }, { "epoch": 0.385493040150501, "grad_norm": 0.1005859375, "learning_rate": 0.0023501429962098333, "loss": 1.2789, "step": 4396 }, { "epoch": 0.3855807319248756, "grad_norm": 0.08203125, "learning_rate": 0.0023497943924863118, "loss": 1.1998, "step": 4397 }, { "epoch": 0.3856684236992501, "grad_norm": 0.10205078125, "learning_rate": 0.002349445724943348, "loss": 1.2014, "step": 4398 }, { "epoch": 0.38575611547362465, "grad_norm": 0.06884765625, "learning_rate": 0.002349096993612739, "loss": 1.2845, "step": 4399 }, { "epoch": 0.3858438072479992, "grad_norm": 0.1015625, "learning_rate": 0.00234874819852629, "loss": 1.2932, "step": 4400 }, { "epoch": 0.3859314990223737, "grad_norm": 0.08349609375, "learning_rate": 0.0023483993397158086, "loss": 1.1951, "step": 4401 }, { "epoch": 0.3860191907967483, "grad_norm": 0.0791015625, "learning_rate": 0.002348050417213111, "loss": 1.2739, "step": 4402 }, { "epoch": 0.3861068825711228, "grad_norm": 0.0771484375, "learning_rate": 0.002347701431050016, "loss": 1.2163, "step": 4403 }, { "epoch": 0.38619457434549737, "grad_norm": 0.08154296875, "learning_rate": 0.0023473523812583526, "loss": 1.2315, "step": 4404 }, { "epoch": 0.38628226611987193, "grad_norm": 0.076171875, "learning_rate": 0.0023470032678699518, "loss": 1.2004, "step": 4405 }, { "epoch": 0.38636995789424644, "grad_norm": 0.1171875, "learning_rate": 0.0023466540909166523, "loss": 1.2047, "step": 4406 }, { "epoch": 0.386457649668621, "grad_norm": 0.06884765625, "learning_rate": 0.0023463048504302982, "loss": 1.2462, "step": 4407 }, { "epoch": 0.38654534144299557, "grad_norm": 0.09912109375, "learning_rate": 0.002345955546442739, "loss": 1.2903, "step": 4408 }, { "epoch": 0.3866330332173701, "grad_norm": 0.06591796875, "learning_rate": 0.002345606178985831, "loss": 1.2617, "step": 4409 }, { "epoch": 0.38672072499174465, "grad_norm": 0.10302734375, "learning_rate": 0.002345256748091435, "loss": 1.1593, "step": 4410 }, { "epoch": 0.3868084167661192, "grad_norm": 0.10546875, "learning_rate": 0.0023449072537914182, "loss": 1.2615, "step": 4411 }, { "epoch": 0.3868961085404937, "grad_norm": 0.126953125, "learning_rate": 0.002344557696117654, "loss": 1.2014, "step": 4412 }, { "epoch": 0.3869838003148683, "grad_norm": 0.1162109375, "learning_rate": 0.002344208075102021, "loss": 1.2566, "step": 4413 }, { "epoch": 0.3870714920892428, "grad_norm": 0.0927734375, "learning_rate": 0.0023438583907764033, "loss": 1.2909, "step": 4414 }, { "epoch": 0.38715918386361736, "grad_norm": 0.11767578125, "learning_rate": 0.002343508643172692, "loss": 1.2334, "step": 4415 }, { "epoch": 0.3872468756379919, "grad_norm": 0.060546875, "learning_rate": 0.0023431588323227824, "loss": 1.246, "step": 4416 }, { "epoch": 0.38733456741236644, "grad_norm": 0.177734375, "learning_rate": 0.0023428089582585767, "loss": 1.2908, "step": 4417 }, { "epoch": 0.387422259186741, "grad_norm": 0.05908203125, "learning_rate": 0.002342459021011983, "loss": 1.2212, "step": 4418 }, { "epoch": 0.38750995096111557, "grad_norm": 0.1826171875, "learning_rate": 0.0023421090206149128, "loss": 1.2277, "step": 4419 }, { "epoch": 0.3875976427354901, "grad_norm": 0.0625, "learning_rate": 0.002341758957099288, "loss": 1.2125, "step": 4420 }, { "epoch": 0.38768533450986464, "grad_norm": 0.12060546875, "learning_rate": 0.002341408830497031, "loss": 1.2614, "step": 4421 }, { "epoch": 0.3877730262842392, "grad_norm": 0.0654296875, "learning_rate": 0.0023410586408400746, "loss": 1.2428, "step": 4422 }, { "epoch": 0.3878607180586137, "grad_norm": 0.0703125, "learning_rate": 0.002340708388160353, "loss": 1.2917, "step": 4423 }, { "epoch": 0.3879484098329883, "grad_norm": 0.06982421875, "learning_rate": 0.0023403580724898104, "loss": 1.2334, "step": 4424 }, { "epoch": 0.3880361016073628, "grad_norm": 0.061767578125, "learning_rate": 0.002340007693860393, "loss": 1.1852, "step": 4425 }, { "epoch": 0.38812379338173736, "grad_norm": 0.07275390625, "learning_rate": 0.002339657252304056, "loss": 1.2402, "step": 4426 }, { "epoch": 0.3882114851561119, "grad_norm": 0.05224609375, "learning_rate": 0.002339306747852757, "loss": 1.2311, "step": 4427 }, { "epoch": 0.38829917693048643, "grad_norm": 0.06494140625, "learning_rate": 0.0023389561805384628, "loss": 1.2576, "step": 4428 }, { "epoch": 0.388386868704861, "grad_norm": 0.0693359375, "learning_rate": 0.002338605550393143, "loss": 1.2228, "step": 4429 }, { "epoch": 0.38847456047923556, "grad_norm": 0.09423828125, "learning_rate": 0.002338254857448775, "loss": 1.2761, "step": 4430 }, { "epoch": 0.3885622522536101, "grad_norm": 0.1318359375, "learning_rate": 0.00233790410173734, "loss": 1.2512, "step": 4431 }, { "epoch": 0.38864994402798464, "grad_norm": 0.07763671875, "learning_rate": 0.0023375532832908275, "loss": 1.2521, "step": 4432 }, { "epoch": 0.3887376358023592, "grad_norm": 0.0654296875, "learning_rate": 0.00233720240214123, "loss": 1.1788, "step": 4433 }, { "epoch": 0.3888253275767337, "grad_norm": 0.08251953125, "learning_rate": 0.0023368514583205478, "loss": 1.2901, "step": 4434 }, { "epoch": 0.3889130193511083, "grad_norm": 0.080078125, "learning_rate": 0.0023365004518607854, "loss": 1.2274, "step": 4435 }, { "epoch": 0.38900071112548285, "grad_norm": 0.08984375, "learning_rate": 0.002336149382793954, "loss": 1.2155, "step": 4436 }, { "epoch": 0.38908840289985736, "grad_norm": 0.11328125, "learning_rate": 0.0023357982511520705, "loss": 1.2477, "step": 4437 }, { "epoch": 0.3891760946742319, "grad_norm": 0.1396484375, "learning_rate": 0.0023354470569671567, "loss": 1.2999, "step": 4438 }, { "epoch": 0.38926378644860643, "grad_norm": 0.06591796875, "learning_rate": 0.0023350958002712405, "loss": 1.2413, "step": 4439 }, { "epoch": 0.389351478222981, "grad_norm": 0.1123046875, "learning_rate": 0.0023347444810963565, "loss": 1.2025, "step": 4440 }, { "epoch": 0.38943916999735556, "grad_norm": 0.06201171875, "learning_rate": 0.002334393099474543, "loss": 1.2063, "step": 4441 }, { "epoch": 0.38952686177173007, "grad_norm": 0.0673828125, "learning_rate": 0.0023340416554378457, "loss": 1.2347, "step": 4442 }, { "epoch": 0.38961455354610464, "grad_norm": 0.07421875, "learning_rate": 0.0023336901490183153, "loss": 1.2625, "step": 4443 }, { "epoch": 0.3897022453204792, "grad_norm": 0.095703125, "learning_rate": 0.0023333385802480084, "loss": 1.2614, "step": 4444 }, { "epoch": 0.3897899370948537, "grad_norm": 0.06884765625, "learning_rate": 0.0023329869491589862, "loss": 1.19, "step": 4445 }, { "epoch": 0.3898776288692283, "grad_norm": 0.08056640625, "learning_rate": 0.0023326352557833186, "loss": 1.2051, "step": 4446 }, { "epoch": 0.38996532064360284, "grad_norm": 0.08056640625, "learning_rate": 0.002332283500153077, "loss": 1.1939, "step": 4447 }, { "epoch": 0.39005301241797735, "grad_norm": 0.08642578125, "learning_rate": 0.0023319316823003414, "loss": 1.2345, "step": 4448 }, { "epoch": 0.3901407041923519, "grad_norm": 0.0625, "learning_rate": 0.0023315798022571976, "loss": 1.286, "step": 4449 }, { "epoch": 0.3902283959667264, "grad_norm": 0.06884765625, "learning_rate": 0.0023312278600557345, "loss": 1.1704, "step": 4450 }, { "epoch": 0.390316087741101, "grad_norm": 0.0517578125, "learning_rate": 0.0023308758557280497, "loss": 1.1723, "step": 4451 }, { "epoch": 0.39040377951547556, "grad_norm": 0.060302734375, "learning_rate": 0.002330523789306244, "loss": 1.221, "step": 4452 }, { "epoch": 0.39049147128985007, "grad_norm": 0.052734375, "learning_rate": 0.002330171660822426, "loss": 1.2053, "step": 4453 }, { "epoch": 0.39057916306422463, "grad_norm": 0.08251953125, "learning_rate": 0.0023298194703087085, "loss": 1.232, "step": 4454 }, { "epoch": 0.3906668548385992, "grad_norm": 0.059326171875, "learning_rate": 0.00232946721779721, "loss": 1.2439, "step": 4455 }, { "epoch": 0.3907545466129737, "grad_norm": 0.0595703125, "learning_rate": 0.0023291149033200552, "loss": 1.2209, "step": 4456 }, { "epoch": 0.3908422383873483, "grad_norm": 0.0595703125, "learning_rate": 0.002328762526909374, "loss": 1.226, "step": 4457 }, { "epoch": 0.39092993016172284, "grad_norm": 0.072265625, "learning_rate": 0.002328410088597303, "loss": 1.272, "step": 4458 }, { "epoch": 0.39101762193609735, "grad_norm": 0.05908203125, "learning_rate": 0.002328057588415983, "loss": 1.215, "step": 4459 }, { "epoch": 0.3911053137104719, "grad_norm": 0.06005859375, "learning_rate": 0.0023277050263975614, "loss": 1.253, "step": 4460 }, { "epoch": 0.3911930054848464, "grad_norm": 0.05810546875, "learning_rate": 0.002327352402574191, "loss": 1.1789, "step": 4461 }, { "epoch": 0.391280697259221, "grad_norm": 0.0771484375, "learning_rate": 0.0023269997169780305, "loss": 1.2507, "step": 4462 }, { "epoch": 0.39136838903359555, "grad_norm": 0.05517578125, "learning_rate": 0.0023266469696412427, "loss": 1.2443, "step": 4463 }, { "epoch": 0.39145608080797006, "grad_norm": 0.09375, "learning_rate": 0.0023262941605959983, "loss": 1.2051, "step": 4464 }, { "epoch": 0.39154377258234463, "grad_norm": 0.064453125, "learning_rate": 0.0023259412898744723, "loss": 1.2027, "step": 4465 }, { "epoch": 0.3916314643567192, "grad_norm": 0.134765625, "learning_rate": 0.002325588357508846, "loss": 1.2597, "step": 4466 }, { "epoch": 0.3917191561310937, "grad_norm": 0.07177734375, "learning_rate": 0.0023252353635313046, "loss": 1.3462, "step": 4467 }, { "epoch": 0.39180684790546827, "grad_norm": 0.062255859375, "learning_rate": 0.002324882307974042, "loss": 1.2045, "step": 4468 }, { "epoch": 0.39189453967984283, "grad_norm": 0.057861328125, "learning_rate": 0.002324529190869254, "loss": 1.1951, "step": 4469 }, { "epoch": 0.39198223145421734, "grad_norm": 0.0673828125, "learning_rate": 0.0023241760122491464, "loss": 1.2359, "step": 4470 }, { "epoch": 0.3920699232285919, "grad_norm": 0.055908203125, "learning_rate": 0.0023238227721459268, "loss": 1.219, "step": 4471 }, { "epoch": 0.3921576150029665, "grad_norm": 0.06396484375, "learning_rate": 0.002323469470591809, "loss": 1.2034, "step": 4472 }, { "epoch": 0.392245306777341, "grad_norm": 0.0927734375, "learning_rate": 0.0023231161076190145, "loss": 1.2535, "step": 4473 }, { "epoch": 0.39233299855171555, "grad_norm": 0.083984375, "learning_rate": 0.0023227626832597686, "loss": 1.2449, "step": 4474 }, { "epoch": 0.39242069032609006, "grad_norm": 0.07666015625, "learning_rate": 0.0023224091975463028, "loss": 1.2531, "step": 4475 }, { "epoch": 0.3925083821004646, "grad_norm": 0.1142578125, "learning_rate": 0.0023220556505108535, "loss": 1.2673, "step": 4476 }, { "epoch": 0.3925960738748392, "grad_norm": 0.1513671875, "learning_rate": 0.002321702042185664, "loss": 1.22, "step": 4477 }, { "epoch": 0.3926837656492137, "grad_norm": 0.10498046875, "learning_rate": 0.0023213483726029827, "loss": 1.2147, "step": 4478 }, { "epoch": 0.39277145742358827, "grad_norm": 0.08544921875, "learning_rate": 0.0023209946417950627, "loss": 1.2293, "step": 4479 }, { "epoch": 0.39285914919796283, "grad_norm": 0.1376953125, "learning_rate": 0.002320640849794163, "loss": 1.1619, "step": 4480 }, { "epoch": 0.39294684097233734, "grad_norm": 0.057373046875, "learning_rate": 0.0023202869966325495, "loss": 1.2594, "step": 4481 }, { "epoch": 0.3930345327467119, "grad_norm": 0.1279296875, "learning_rate": 0.002319933082342492, "loss": 1.2428, "step": 4482 }, { "epoch": 0.39312222452108647, "grad_norm": 0.10205078125, "learning_rate": 0.002319579106956267, "loss": 1.3231, "step": 4483 }, { "epoch": 0.393209916295461, "grad_norm": 0.1455078125, "learning_rate": 0.0023192250705061563, "loss": 1.2396, "step": 4484 }, { "epoch": 0.39329760806983555, "grad_norm": 0.10791015625, "learning_rate": 0.0023188709730244464, "loss": 1.201, "step": 4485 }, { "epoch": 0.39338529984421006, "grad_norm": 0.1181640625, "learning_rate": 0.0023185168145434306, "loss": 1.2998, "step": 4486 }, { "epoch": 0.3934729916185846, "grad_norm": 0.08642578125, "learning_rate": 0.0023181625950954073, "loss": 1.2515, "step": 4487 }, { "epoch": 0.3935606833929592, "grad_norm": 0.052001953125, "learning_rate": 0.0023178083147126793, "loss": 1.269, "step": 4488 }, { "epoch": 0.3936483751673337, "grad_norm": 0.1240234375, "learning_rate": 0.0023174539734275574, "loss": 1.2343, "step": 4489 }, { "epoch": 0.39373606694170826, "grad_norm": 0.060791015625, "learning_rate": 0.0023170995712723567, "loss": 1.2354, "step": 4490 }, { "epoch": 0.39382375871608283, "grad_norm": 0.1025390625, "learning_rate": 0.0023167451082793966, "loss": 1.279, "step": 4491 }, { "epoch": 0.39391145049045734, "grad_norm": 0.064453125, "learning_rate": 0.0023163905844810037, "loss": 1.2576, "step": 4492 }, { "epoch": 0.3939991422648319, "grad_norm": 0.06689453125, "learning_rate": 0.0023160359999095104, "loss": 1.2211, "step": 4493 }, { "epoch": 0.39408683403920647, "grad_norm": 0.06884765625, "learning_rate": 0.0023156813545972526, "loss": 1.1893, "step": 4494 }, { "epoch": 0.394174525813581, "grad_norm": 0.07763671875, "learning_rate": 0.0023153266485765743, "loss": 1.3233, "step": 4495 }, { "epoch": 0.39426221758795554, "grad_norm": 0.107421875, "learning_rate": 0.0023149718818798225, "loss": 1.2163, "step": 4496 }, { "epoch": 0.39434990936233005, "grad_norm": 0.0556640625, "learning_rate": 0.0023146170545393524, "loss": 1.2233, "step": 4497 }, { "epoch": 0.3944376011367046, "grad_norm": 0.076171875, "learning_rate": 0.002314262166587522, "loss": 1.2079, "step": 4498 }, { "epoch": 0.3945252929110792, "grad_norm": 0.091796875, "learning_rate": 0.0023139072180566973, "loss": 1.2675, "step": 4499 }, { "epoch": 0.3946129846854537, "grad_norm": 0.07177734375, "learning_rate": 0.002313552208979248, "loss": 1.2258, "step": 4500 }, { "epoch": 0.3946129846854537, "eval_loss": 1.2392518520355225, "eval_runtime": 428.8799, "eval_samples_per_second": 33.685, "eval_steps_per_second": 8.422, "step": 4500 }, { "epoch": 0.39470067645982826, "grad_norm": 0.1015625, "learning_rate": 0.00231319713938755, "loss": 1.2099, "step": 4501 }, { "epoch": 0.3947883682342028, "grad_norm": 0.07080078125, "learning_rate": 0.0023128420093139848, "loss": 1.2018, "step": 4502 }, { "epoch": 0.39487606000857733, "grad_norm": 0.09130859375, "learning_rate": 0.0023124868187909403, "loss": 1.1938, "step": 4503 }, { "epoch": 0.3949637517829519, "grad_norm": 0.09765625, "learning_rate": 0.0023121315678508074, "loss": 1.1941, "step": 4504 }, { "epoch": 0.39505144355732646, "grad_norm": 0.080078125, "learning_rate": 0.002311776256525985, "loss": 1.2007, "step": 4505 }, { "epoch": 0.395139135331701, "grad_norm": 0.09423828125, "learning_rate": 0.0023114208848488763, "loss": 1.2255, "step": 4506 }, { "epoch": 0.39522682710607554, "grad_norm": 0.09814453125, "learning_rate": 0.0023110654528518903, "loss": 1.2484, "step": 4507 }, { "epoch": 0.3953145188804501, "grad_norm": 0.07763671875, "learning_rate": 0.0023107099605674418, "loss": 1.2778, "step": 4508 }, { "epoch": 0.3954022106548246, "grad_norm": 0.078125, "learning_rate": 0.00231035440802795, "loss": 1.2409, "step": 4509 }, { "epoch": 0.3954899024291992, "grad_norm": 0.068359375, "learning_rate": 0.002309998795265841, "loss": 1.2708, "step": 4510 }, { "epoch": 0.3955775942035737, "grad_norm": 0.08935546875, "learning_rate": 0.002309643122313546, "loss": 1.3125, "step": 4511 }, { "epoch": 0.39566528597794826, "grad_norm": 0.08203125, "learning_rate": 0.002309287389203501, "loss": 1.2391, "step": 4512 }, { "epoch": 0.3957529777523228, "grad_norm": 0.0654296875, "learning_rate": 0.0023089315959681483, "loss": 1.2689, "step": 4513 }, { "epoch": 0.39584066952669733, "grad_norm": 0.10009765625, "learning_rate": 0.0023085757426399346, "loss": 1.2491, "step": 4514 }, { "epoch": 0.3959283613010719, "grad_norm": 0.05615234375, "learning_rate": 0.0023082198292513137, "loss": 1.2026, "step": 4515 }, { "epoch": 0.39601605307544646, "grad_norm": 0.1484375, "learning_rate": 0.002307863855834743, "loss": 1.2414, "step": 4516 }, { "epoch": 0.39610374484982097, "grad_norm": 0.0576171875, "learning_rate": 0.0023075078224226876, "loss": 1.2214, "step": 4517 }, { "epoch": 0.39619143662419554, "grad_norm": 0.1298828125, "learning_rate": 0.0023071517290476156, "loss": 1.2097, "step": 4518 }, { "epoch": 0.3962791283985701, "grad_norm": 0.0595703125, "learning_rate": 0.0023067955757420026, "loss": 1.1662, "step": 4519 }, { "epoch": 0.3963668201729446, "grad_norm": 0.1337890625, "learning_rate": 0.0023064393625383288, "loss": 1.2372, "step": 4520 }, { "epoch": 0.3964545119473192, "grad_norm": 0.07666015625, "learning_rate": 0.0023060830894690793, "loss": 1.2135, "step": 4521 }, { "epoch": 0.3965422037216937, "grad_norm": 0.1279296875, "learning_rate": 0.002305726756566746, "loss": 1.1882, "step": 4522 }, { "epoch": 0.39662989549606825, "grad_norm": 0.083984375, "learning_rate": 0.0023053703638638246, "loss": 1.2553, "step": 4523 }, { "epoch": 0.3967175872704428, "grad_norm": 0.1025390625, "learning_rate": 0.0023050139113928187, "loss": 1.2191, "step": 4524 }, { "epoch": 0.3968052790448173, "grad_norm": 0.0791015625, "learning_rate": 0.0023046573991862347, "loss": 1.2148, "step": 4525 }, { "epoch": 0.3968929708191919, "grad_norm": 0.06005859375, "learning_rate": 0.002304300827276586, "loss": 1.2323, "step": 4526 }, { "epoch": 0.39698066259356646, "grad_norm": 0.059326171875, "learning_rate": 0.0023039441956963906, "loss": 1.2741, "step": 4527 }, { "epoch": 0.39706835436794097, "grad_norm": 0.056396484375, "learning_rate": 0.002303587504478173, "loss": 1.2397, "step": 4528 }, { "epoch": 0.39715604614231553, "grad_norm": 0.07421875, "learning_rate": 0.002303230753654462, "loss": 1.2474, "step": 4529 }, { "epoch": 0.3972437379166901, "grad_norm": 0.059814453125, "learning_rate": 0.0023028739432577926, "loss": 1.2199, "step": 4530 }, { "epoch": 0.3973314296910646, "grad_norm": 0.061279296875, "learning_rate": 0.002302517073320705, "loss": 1.1333, "step": 4531 }, { "epoch": 0.3974191214654392, "grad_norm": 0.06005859375, "learning_rate": 0.0023021601438757444, "loss": 1.2512, "step": 4532 }, { "epoch": 0.3975068132398137, "grad_norm": 0.064453125, "learning_rate": 0.002301803154955463, "loss": 1.2034, "step": 4533 }, { "epoch": 0.39759450501418825, "grad_norm": 0.053466796875, "learning_rate": 0.002301446106592416, "loss": 1.2332, "step": 4534 }, { "epoch": 0.3976821967885628, "grad_norm": 0.0517578125, "learning_rate": 0.0023010889988191656, "loss": 1.2396, "step": 4535 }, { "epoch": 0.3977698885629373, "grad_norm": 0.09326171875, "learning_rate": 0.0023007318316682797, "loss": 1.2215, "step": 4536 }, { "epoch": 0.3978575803373119, "grad_norm": 0.056640625, "learning_rate": 0.0023003746051723306, "loss": 1.2365, "step": 4537 }, { "epoch": 0.39794527211168645, "grad_norm": 0.08349609375, "learning_rate": 0.002300017319363896, "loss": 1.2355, "step": 4538 }, { "epoch": 0.39803296388606096, "grad_norm": 0.05810546875, "learning_rate": 0.0022996599742755607, "loss": 1.2324, "step": 4539 }, { "epoch": 0.39812065566043553, "grad_norm": 0.056884765625, "learning_rate": 0.002299302569939912, "loss": 1.2264, "step": 4540 }, { "epoch": 0.3982083474348101, "grad_norm": 0.07666015625, "learning_rate": 0.0022989451063895453, "loss": 1.2274, "step": 4541 }, { "epoch": 0.3982960392091846, "grad_norm": 0.10205078125, "learning_rate": 0.0022985875836570606, "loss": 1.1845, "step": 4542 }, { "epoch": 0.39838373098355917, "grad_norm": 0.058837890625, "learning_rate": 0.002298230001775062, "loss": 1.1684, "step": 4543 }, { "epoch": 0.39847142275793374, "grad_norm": 0.0908203125, "learning_rate": 0.002297872360776161, "loss": 1.2876, "step": 4544 }, { "epoch": 0.39855911453230825, "grad_norm": 0.078125, "learning_rate": 0.002297514660692973, "loss": 1.2507, "step": 4545 }, { "epoch": 0.3986468063066828, "grad_norm": 0.11279296875, "learning_rate": 0.0022971569015581197, "loss": 1.2119, "step": 4546 }, { "epoch": 0.3987344980810573, "grad_norm": 0.064453125, "learning_rate": 0.0022967990834042275, "loss": 1.2333, "step": 4547 }, { "epoch": 0.3988221898554319, "grad_norm": 0.056884765625, "learning_rate": 0.0022964412062639284, "loss": 1.2082, "step": 4548 }, { "epoch": 0.39890988162980645, "grad_norm": 0.06396484375, "learning_rate": 0.00229608327016986, "loss": 1.2479, "step": 4549 }, { "epoch": 0.39899757340418096, "grad_norm": 0.0927734375, "learning_rate": 0.0022957252751546657, "loss": 1.2099, "step": 4550 }, { "epoch": 0.3990852651785555, "grad_norm": 0.072265625, "learning_rate": 0.0022953672212509928, "loss": 1.2669, "step": 4551 }, { "epoch": 0.3991729569529301, "grad_norm": 0.05322265625, "learning_rate": 0.002295009108491496, "loss": 1.2147, "step": 4552 }, { "epoch": 0.3992606487273046, "grad_norm": 0.055908203125, "learning_rate": 0.0022946509369088327, "loss": 1.1513, "step": 4553 }, { "epoch": 0.39934834050167917, "grad_norm": 0.053466796875, "learning_rate": 0.0022942927065356684, "loss": 1.2532, "step": 4554 }, { "epoch": 0.39943603227605373, "grad_norm": 0.06591796875, "learning_rate": 0.002293934417404673, "loss": 1.1983, "step": 4555 }, { "epoch": 0.39952372405042824, "grad_norm": 0.07958984375, "learning_rate": 0.0022935760695485204, "loss": 1.214, "step": 4556 }, { "epoch": 0.3996114158248028, "grad_norm": 0.058837890625, "learning_rate": 0.002293217662999892, "loss": 1.2645, "step": 4557 }, { "epoch": 0.3996991075991773, "grad_norm": 0.06689453125, "learning_rate": 0.0022928591977914733, "loss": 1.249, "step": 4558 }, { "epoch": 0.3997867993735519, "grad_norm": 0.04833984375, "learning_rate": 0.002292500673955955, "loss": 1.183, "step": 4559 }, { "epoch": 0.39987449114792645, "grad_norm": 0.05224609375, "learning_rate": 0.0022921420915260338, "loss": 1.2224, "step": 4560 }, { "epoch": 0.39996218292230096, "grad_norm": 0.06640625, "learning_rate": 0.0022917834505344114, "loss": 1.159, "step": 4561 }, { "epoch": 0.4000498746966755, "grad_norm": 0.0576171875, "learning_rate": 0.0022914247510137952, "loss": 1.1701, "step": 4562 }, { "epoch": 0.4001375664710501, "grad_norm": 0.0517578125, "learning_rate": 0.002291065992996898, "loss": 1.2131, "step": 4563 }, { "epoch": 0.4002252582454246, "grad_norm": 0.09130859375, "learning_rate": 0.0022907071765164372, "loss": 1.2053, "step": 4564 }, { "epoch": 0.40031295001979916, "grad_norm": 0.06689453125, "learning_rate": 0.0022903483016051347, "loss": 1.2464, "step": 4565 }, { "epoch": 0.40040064179417373, "grad_norm": 0.078125, "learning_rate": 0.002289989368295721, "loss": 1.2049, "step": 4566 }, { "epoch": 0.40048833356854824, "grad_norm": 0.05322265625, "learning_rate": 0.002289630376620929, "loss": 1.2584, "step": 4567 }, { "epoch": 0.4005760253429228, "grad_norm": 0.0810546875, "learning_rate": 0.002289271326613498, "loss": 1.2215, "step": 4568 }, { "epoch": 0.4006637171172973, "grad_norm": 0.059814453125, "learning_rate": 0.002288912218306172, "loss": 1.237, "step": 4569 }, { "epoch": 0.4007514088916719, "grad_norm": 0.11474609375, "learning_rate": 0.002288553051731701, "loss": 1.2516, "step": 4570 }, { "epoch": 0.40083910066604644, "grad_norm": 0.1376953125, "learning_rate": 0.0022881938269228405, "loss": 1.2975, "step": 4571 }, { "epoch": 0.40092679244042095, "grad_norm": 0.07177734375, "learning_rate": 0.00228783454391235, "loss": 1.1631, "step": 4572 }, { "epoch": 0.4010144842147955, "grad_norm": 0.1123046875, "learning_rate": 0.002287475202732996, "loss": 1.2221, "step": 4573 }, { "epoch": 0.4011021759891701, "grad_norm": 0.06884765625, "learning_rate": 0.0022871158034175484, "loss": 1.2415, "step": 4574 }, { "epoch": 0.4011898677635446, "grad_norm": 0.177734375, "learning_rate": 0.0022867563459987856, "loss": 1.2546, "step": 4575 }, { "epoch": 0.40127755953791916, "grad_norm": 0.06689453125, "learning_rate": 0.0022863968305094867, "loss": 1.2308, "step": 4576 }, { "epoch": 0.4013652513122937, "grad_norm": 0.146484375, "learning_rate": 0.00228603725698244, "loss": 1.1921, "step": 4577 }, { "epoch": 0.40145294308666823, "grad_norm": 0.05517578125, "learning_rate": 0.0022856776254504374, "loss": 1.2055, "step": 4578 }, { "epoch": 0.4015406348610428, "grad_norm": 0.08544921875, "learning_rate": 0.0022853179359462765, "loss": 1.2448, "step": 4579 }, { "epoch": 0.4016283266354173, "grad_norm": 0.06298828125, "learning_rate": 0.00228495818850276, "loss": 1.2259, "step": 4580 }, { "epoch": 0.4017160184097919, "grad_norm": 0.07421875, "learning_rate": 0.0022845983831526954, "loss": 1.2609, "step": 4581 }, { "epoch": 0.40180371018416644, "grad_norm": 0.07470703125, "learning_rate": 0.002284238519928897, "loss": 1.1922, "step": 4582 }, { "epoch": 0.40189140195854095, "grad_norm": 0.068359375, "learning_rate": 0.0022838785988641822, "loss": 1.2356, "step": 4583 }, { "epoch": 0.4019790937329155, "grad_norm": 0.064453125, "learning_rate": 0.002283518619991376, "loss": 1.2284, "step": 4584 }, { "epoch": 0.4020667855072901, "grad_norm": 0.0693359375, "learning_rate": 0.002283158583343307, "loss": 1.2899, "step": 4585 }, { "epoch": 0.4021544772816646, "grad_norm": 0.064453125, "learning_rate": 0.0022827984889528096, "loss": 1.2525, "step": 4586 }, { "epoch": 0.40224216905603916, "grad_norm": 0.06640625, "learning_rate": 0.002282438336852723, "loss": 1.2002, "step": 4587 }, { "epoch": 0.4023298608304137, "grad_norm": 0.07958984375, "learning_rate": 0.0022820781270758944, "loss": 1.2552, "step": 4588 }, { "epoch": 0.40241755260478823, "grad_norm": 0.0517578125, "learning_rate": 0.0022817178596551708, "loss": 1.2702, "step": 4589 }, { "epoch": 0.4025052443791628, "grad_norm": 0.0830078125, "learning_rate": 0.0022813575346234097, "loss": 1.1903, "step": 4590 }, { "epoch": 0.40259293615353736, "grad_norm": 0.062255859375, "learning_rate": 0.002280997152013471, "loss": 1.2588, "step": 4591 }, { "epoch": 0.40268062792791187, "grad_norm": 0.07958984375, "learning_rate": 0.0022806367118582216, "loss": 1.2141, "step": 4592 }, { "epoch": 0.40276831970228644, "grad_norm": 0.05517578125, "learning_rate": 0.002280276214190531, "loss": 1.2417, "step": 4593 }, { "epoch": 0.40285601147666095, "grad_norm": 0.08447265625, "learning_rate": 0.0022799156590432774, "loss": 1.1861, "step": 4594 }, { "epoch": 0.4029437032510355, "grad_norm": 0.057861328125, "learning_rate": 0.0022795550464493417, "loss": 1.1941, "step": 4595 }, { "epoch": 0.4030313950254101, "grad_norm": 0.06640625, "learning_rate": 0.0022791943764416114, "loss": 1.2768, "step": 4596 }, { "epoch": 0.4031190867997846, "grad_norm": 0.07861328125, "learning_rate": 0.0022788336490529777, "loss": 1.2224, "step": 4597 }, { "epoch": 0.40320677857415915, "grad_norm": 0.068359375, "learning_rate": 0.0022784728643163387, "loss": 1.2439, "step": 4598 }, { "epoch": 0.4032944703485337, "grad_norm": 0.0849609375, "learning_rate": 0.002278112022264597, "loss": 1.2556, "step": 4599 }, { "epoch": 0.4033821621229082, "grad_norm": 0.07763671875, "learning_rate": 0.0022777511229306597, "loss": 1.2159, "step": 4600 }, { "epoch": 0.4034698538972828, "grad_norm": 0.052001953125, "learning_rate": 0.0022773901663474413, "loss": 1.1735, "step": 4601 }, { "epoch": 0.40355754567165736, "grad_norm": 0.134765625, "learning_rate": 0.002277029152547859, "loss": 1.2198, "step": 4602 }, { "epoch": 0.40364523744603187, "grad_norm": 0.08251953125, "learning_rate": 0.002276668081564837, "loss": 1.2214, "step": 4603 }, { "epoch": 0.40373292922040643, "grad_norm": 0.06201171875, "learning_rate": 0.0022763069534313027, "loss": 1.2661, "step": 4604 }, { "epoch": 0.40382062099478094, "grad_norm": 0.08154296875, "learning_rate": 0.002275945768180192, "loss": 1.2777, "step": 4605 }, { "epoch": 0.4039083127691555, "grad_norm": 0.08203125, "learning_rate": 0.002275584525844443, "loss": 1.2763, "step": 4606 }, { "epoch": 0.4039960045435301, "grad_norm": 0.12451171875, "learning_rate": 0.0022752232264569997, "loss": 1.2953, "step": 4607 }, { "epoch": 0.4040836963179046, "grad_norm": 0.059326171875, "learning_rate": 0.0022748618700508126, "loss": 1.2095, "step": 4608 }, { "epoch": 0.40417138809227915, "grad_norm": 0.1103515625, "learning_rate": 0.002274500456658836, "loss": 1.239, "step": 4609 }, { "epoch": 0.4042590798666537, "grad_norm": 0.059326171875, "learning_rate": 0.0022741389863140293, "loss": 1.2036, "step": 4610 }, { "epoch": 0.4043467716410282, "grad_norm": 0.12109375, "learning_rate": 0.002273777459049359, "loss": 1.1731, "step": 4611 }, { "epoch": 0.4044344634154028, "grad_norm": 0.1015625, "learning_rate": 0.0022734158748977944, "loss": 1.2125, "step": 4612 }, { "epoch": 0.40452215518977735, "grad_norm": 0.1806640625, "learning_rate": 0.0022730542338923114, "loss": 1.2531, "step": 4613 }, { "epoch": 0.40460984696415186, "grad_norm": 0.057861328125, "learning_rate": 0.0022726925360658902, "loss": 1.2051, "step": 4614 }, { "epoch": 0.40469753873852643, "grad_norm": 0.11572265625, "learning_rate": 0.0022723307814515184, "loss": 1.192, "step": 4615 }, { "epoch": 0.40478523051290094, "grad_norm": 0.1103515625, "learning_rate": 0.0022719689700821852, "loss": 1.1954, "step": 4616 }, { "epoch": 0.4048729222872755, "grad_norm": 0.12060546875, "learning_rate": 0.0022716071019908872, "loss": 1.2465, "step": 4617 }, { "epoch": 0.40496061406165007, "grad_norm": 0.1484375, "learning_rate": 0.0022712451772106262, "loss": 1.2778, "step": 4618 }, { "epoch": 0.4050483058360246, "grad_norm": 0.1240234375, "learning_rate": 0.002270883195774409, "loss": 1.2833, "step": 4619 }, { "epoch": 0.40513599761039915, "grad_norm": 0.0791015625, "learning_rate": 0.002270521157715247, "loss": 1.2269, "step": 4620 }, { "epoch": 0.4052236893847737, "grad_norm": 0.0966796875, "learning_rate": 0.0022701590630661577, "loss": 1.1917, "step": 4621 }, { "epoch": 0.4053113811591482, "grad_norm": 0.052734375, "learning_rate": 0.0022697969118601626, "loss": 1.1955, "step": 4622 }, { "epoch": 0.4053990729335228, "grad_norm": 0.0810546875, "learning_rate": 0.0022694347041302887, "loss": 1.2471, "step": 4623 }, { "epoch": 0.40548676470789735, "grad_norm": 0.052001953125, "learning_rate": 0.0022690724399095693, "loss": 1.2755, "step": 4624 }, { "epoch": 0.40557445648227186, "grad_norm": 0.072265625, "learning_rate": 0.0022687101192310414, "loss": 1.2249, "step": 4625 }, { "epoch": 0.4056621482566464, "grad_norm": 0.056640625, "learning_rate": 0.0022683477421277477, "loss": 1.3006, "step": 4626 }, { "epoch": 0.405749840031021, "grad_norm": 0.07763671875, "learning_rate": 0.0022679853086327363, "loss": 1.2737, "step": 4627 }, { "epoch": 0.4058375318053955, "grad_norm": 0.0634765625, "learning_rate": 0.00226762281877906, "loss": 1.1736, "step": 4628 }, { "epoch": 0.40592522357977007, "grad_norm": 0.0830078125, "learning_rate": 0.002267260272599777, "loss": 1.2439, "step": 4629 }, { "epoch": 0.4060129153541446, "grad_norm": 0.06103515625, "learning_rate": 0.002266897670127951, "loss": 1.2296, "step": 4630 }, { "epoch": 0.40610060712851914, "grad_norm": 0.08544921875, "learning_rate": 0.0022665350113966498, "loss": 1.2521, "step": 4631 }, { "epoch": 0.4061882989028937, "grad_norm": 0.0908203125, "learning_rate": 0.0022661722964389466, "loss": 1.1711, "step": 4632 }, { "epoch": 0.4062759906772682, "grad_norm": 0.068359375, "learning_rate": 0.0022658095252879214, "loss": 1.2059, "step": 4633 }, { "epoch": 0.4063636824516428, "grad_norm": 0.05859375, "learning_rate": 0.0022654466979766565, "loss": 1.1985, "step": 4634 }, { "epoch": 0.40645137422601735, "grad_norm": 0.061767578125, "learning_rate": 0.0022650838145382417, "loss": 1.1845, "step": 4635 }, { "epoch": 0.40653906600039186, "grad_norm": 0.0810546875, "learning_rate": 0.002264720875005771, "loss": 1.2728, "step": 4636 }, { "epoch": 0.4066267577747664, "grad_norm": 0.1015625, "learning_rate": 0.0022643578794123436, "loss": 1.247, "step": 4637 }, { "epoch": 0.406714449549141, "grad_norm": 0.07861328125, "learning_rate": 0.0022639948277910628, "loss": 1.2601, "step": 4638 }, { "epoch": 0.4068021413235155, "grad_norm": 0.05712890625, "learning_rate": 0.0022636317201750397, "loss": 1.2293, "step": 4639 }, { "epoch": 0.40688983309789006, "grad_norm": 0.068359375, "learning_rate": 0.0022632685565973872, "loss": 1.1542, "step": 4640 }, { "epoch": 0.4069775248722646, "grad_norm": 0.057861328125, "learning_rate": 0.0022629053370912255, "loss": 1.1651, "step": 4641 }, { "epoch": 0.40706521664663914, "grad_norm": 0.08349609375, "learning_rate": 0.002262542061689679, "loss": 1.2381, "step": 4642 }, { "epoch": 0.4071529084210137, "grad_norm": 0.08154296875, "learning_rate": 0.0022621787304258783, "loss": 1.2756, "step": 4643 }, { "epoch": 0.4072406001953882, "grad_norm": 0.0478515625, "learning_rate": 0.0022618153433329575, "loss": 1.262, "step": 4644 }, { "epoch": 0.4073282919697628, "grad_norm": 0.0859375, "learning_rate": 0.0022614519004440573, "loss": 1.2373, "step": 4645 }, { "epoch": 0.40741598374413734, "grad_norm": 0.0703125, "learning_rate": 0.0022610884017923217, "loss": 1.217, "step": 4646 }, { "epoch": 0.40750367551851185, "grad_norm": 0.057373046875, "learning_rate": 0.002260724847410901, "loss": 1.2484, "step": 4647 }, { "epoch": 0.4075913672928864, "grad_norm": 0.07080078125, "learning_rate": 0.0022603612373329517, "loss": 1.2387, "step": 4648 }, { "epoch": 0.407679059067261, "grad_norm": 0.055419921875, "learning_rate": 0.0022599975715916327, "loss": 1.2224, "step": 4649 }, { "epoch": 0.4077667508416355, "grad_norm": 0.06201171875, "learning_rate": 0.00225963385022011, "loss": 1.2563, "step": 4650 }, { "epoch": 0.40785444261601006, "grad_norm": 0.061279296875, "learning_rate": 0.002259270073251553, "loss": 1.1949, "step": 4651 }, { "epoch": 0.40794213439038457, "grad_norm": 0.09814453125, "learning_rate": 0.0022589062407191397, "loss": 1.2432, "step": 4652 }, { "epoch": 0.40802982616475914, "grad_norm": 0.060791015625, "learning_rate": 0.002258542352656048, "loss": 1.2266, "step": 4653 }, { "epoch": 0.4081175179391337, "grad_norm": 0.08447265625, "learning_rate": 0.0022581784090954654, "loss": 1.1902, "step": 4654 }, { "epoch": 0.4082052097135082, "grad_norm": 0.05810546875, "learning_rate": 0.002257814410070581, "loss": 1.2522, "step": 4655 }, { "epoch": 0.4082929014878828, "grad_norm": 0.0927734375, "learning_rate": 0.0022574503556145923, "loss": 1.2303, "step": 4656 }, { "epoch": 0.40838059326225734, "grad_norm": 0.06787109375, "learning_rate": 0.0022570862457606993, "loss": 1.2545, "step": 4657 }, { "epoch": 0.40846828503663185, "grad_norm": 0.0673828125, "learning_rate": 0.0022567220805421073, "loss": 1.2198, "step": 4658 }, { "epoch": 0.4085559768110064, "grad_norm": 0.0537109375, "learning_rate": 0.0022563578599920277, "loss": 1.2331, "step": 4659 }, { "epoch": 0.408643668585381, "grad_norm": 0.08056640625, "learning_rate": 0.002255993584143677, "loss": 1.2061, "step": 4660 }, { "epoch": 0.4087313603597555, "grad_norm": 0.07080078125, "learning_rate": 0.002255629253030276, "loss": 1.1709, "step": 4661 }, { "epoch": 0.40881905213413006, "grad_norm": 0.171875, "learning_rate": 0.00225526486668505, "loss": 1.2112, "step": 4662 }, { "epoch": 0.4089067439085046, "grad_norm": 0.06103515625, "learning_rate": 0.0022549004251412315, "loss": 1.2593, "step": 4663 }, { "epoch": 0.40899443568287913, "grad_norm": 0.1318359375, "learning_rate": 0.0022545359284320554, "loss": 1.2678, "step": 4664 }, { "epoch": 0.4090821274572537, "grad_norm": 0.0625, "learning_rate": 0.0022541713765907626, "loss": 1.297, "step": 4665 }, { "epoch": 0.4091698192316282, "grad_norm": 0.05908203125, "learning_rate": 0.0022538067696506003, "loss": 1.2357, "step": 4666 }, { "epoch": 0.40925751100600277, "grad_norm": 0.06787109375, "learning_rate": 0.0022534421076448197, "loss": 1.1663, "step": 4667 }, { "epoch": 0.40934520278037734, "grad_norm": 0.0625, "learning_rate": 0.0022530773906066764, "loss": 1.2798, "step": 4668 }, { "epoch": 0.40943289455475185, "grad_norm": 0.05810546875, "learning_rate": 0.0022527126185694314, "loss": 1.2627, "step": 4669 }, { "epoch": 0.4095205863291264, "grad_norm": 0.0615234375, "learning_rate": 0.002252347791566352, "loss": 1.2095, "step": 4670 }, { "epoch": 0.409608278103501, "grad_norm": 0.0703125, "learning_rate": 0.002251982909630709, "loss": 1.2243, "step": 4671 }, { "epoch": 0.4096959698778755, "grad_norm": 0.052734375, "learning_rate": 0.0022516179727957784, "loss": 1.1971, "step": 4672 }, { "epoch": 0.40978366165225005, "grad_norm": 0.0634765625, "learning_rate": 0.002251252981094842, "loss": 1.2483, "step": 4673 }, { "epoch": 0.4098713534266246, "grad_norm": 0.078125, "learning_rate": 0.002250887934561186, "loss": 1.2584, "step": 4674 }, { "epoch": 0.40995904520099913, "grad_norm": 0.07421875, "learning_rate": 0.002250522833228101, "loss": 1.2149, "step": 4675 }, { "epoch": 0.4100467369753737, "grad_norm": 0.061279296875, "learning_rate": 0.002250157677128884, "loss": 1.1932, "step": 4676 }, { "epoch": 0.4101344287497482, "grad_norm": 0.07373046875, "learning_rate": 0.0022497924662968362, "loss": 1.1947, "step": 4677 }, { "epoch": 0.41022212052412277, "grad_norm": 0.06201171875, "learning_rate": 0.002249427200765264, "loss": 1.2329, "step": 4678 }, { "epoch": 0.41030981229849733, "grad_norm": 0.064453125, "learning_rate": 0.0022490618805674784, "loss": 1.2202, "step": 4679 }, { "epoch": 0.41039750407287184, "grad_norm": 0.06103515625, "learning_rate": 0.002248696505736796, "loss": 1.2431, "step": 4680 }, { "epoch": 0.4104851958472464, "grad_norm": 0.072265625, "learning_rate": 0.0022483310763065376, "loss": 1.2301, "step": 4681 }, { "epoch": 0.410572887621621, "grad_norm": 0.107421875, "learning_rate": 0.00224796559231003, "loss": 1.223, "step": 4682 }, { "epoch": 0.4106605793959955, "grad_norm": 0.08837890625, "learning_rate": 0.0022476000537806034, "loss": 1.2496, "step": 4683 }, { "epoch": 0.41074827117037005, "grad_norm": 0.0634765625, "learning_rate": 0.0022472344607515947, "loss": 1.1825, "step": 4684 }, { "epoch": 0.4108359629447446, "grad_norm": 0.0830078125, "learning_rate": 0.002246868813256345, "loss": 1.2002, "step": 4685 }, { "epoch": 0.4109236547191191, "grad_norm": 0.0810546875, "learning_rate": 0.0022465031113282005, "loss": 1.2458, "step": 4686 }, { "epoch": 0.4110113464934937, "grad_norm": 0.0751953125, "learning_rate": 0.002246137355000512, "loss": 1.1966, "step": 4687 }, { "epoch": 0.4110990382678682, "grad_norm": 0.1279296875, "learning_rate": 0.002245771544306636, "loss": 1.1991, "step": 4688 }, { "epoch": 0.41118673004224277, "grad_norm": 0.06005859375, "learning_rate": 0.002245405679279932, "loss": 1.2187, "step": 4689 }, { "epoch": 0.41127442181661733, "grad_norm": 0.09765625, "learning_rate": 0.002245039759953768, "loss": 1.2433, "step": 4690 }, { "epoch": 0.41136211359099184, "grad_norm": 0.06396484375, "learning_rate": 0.0022446737863615145, "loss": 1.1845, "step": 4691 }, { "epoch": 0.4114498053653664, "grad_norm": 0.115234375, "learning_rate": 0.002244307758536546, "loss": 1.2094, "step": 4692 }, { "epoch": 0.41153749713974097, "grad_norm": 0.057373046875, "learning_rate": 0.002243941676512244, "loss": 1.1919, "step": 4693 }, { "epoch": 0.4116251889141155, "grad_norm": 0.07470703125, "learning_rate": 0.002243575540321995, "loss": 1.2175, "step": 4694 }, { "epoch": 0.41171288068849005, "grad_norm": 0.07373046875, "learning_rate": 0.002243209349999189, "loss": 1.2262, "step": 4695 }, { "epoch": 0.4118005724628646, "grad_norm": 0.0517578125, "learning_rate": 0.002242843105577221, "loss": 1.1616, "step": 4696 }, { "epoch": 0.4118882642372391, "grad_norm": 0.05615234375, "learning_rate": 0.0022424768070894922, "loss": 1.2157, "step": 4697 }, { "epoch": 0.4119759560116137, "grad_norm": 0.080078125, "learning_rate": 0.0022421104545694084, "loss": 1.2278, "step": 4698 }, { "epoch": 0.41206364778598825, "grad_norm": 0.061767578125, "learning_rate": 0.00224174404805038, "loss": 1.2186, "step": 4699 }, { "epoch": 0.41215133956036276, "grad_norm": 0.054443359375, "learning_rate": 0.0022413775875658216, "loss": 1.2142, "step": 4700 }, { "epoch": 0.4122390313347373, "grad_norm": 0.06298828125, "learning_rate": 0.0022410110731491536, "loss": 1.2073, "step": 4701 }, { "epoch": 0.41232672310911184, "grad_norm": 0.06591796875, "learning_rate": 0.0022406445048338015, "loss": 1.15, "step": 4702 }, { "epoch": 0.4124144148834864, "grad_norm": 0.05712890625, "learning_rate": 0.002240277882653196, "loss": 1.2157, "step": 4703 }, { "epoch": 0.41250210665786097, "grad_norm": 0.0693359375, "learning_rate": 0.0022399112066407707, "loss": 1.2201, "step": 4704 }, { "epoch": 0.4125897984322355, "grad_norm": 0.06103515625, "learning_rate": 0.0022395444768299666, "loss": 1.2146, "step": 4705 }, { "epoch": 0.41267749020661004, "grad_norm": 0.059814453125, "learning_rate": 0.0022391776932542276, "loss": 1.2705, "step": 4706 }, { "epoch": 0.4127651819809846, "grad_norm": 0.06884765625, "learning_rate": 0.002238810855947004, "loss": 1.2639, "step": 4707 }, { "epoch": 0.4128528737553591, "grad_norm": 0.1484375, "learning_rate": 0.002238443964941751, "loss": 1.2706, "step": 4708 }, { "epoch": 0.4129405655297337, "grad_norm": 0.0556640625, "learning_rate": 0.002238077020271927, "loss": 1.2046, "step": 4709 }, { "epoch": 0.41302825730410825, "grad_norm": 0.1474609375, "learning_rate": 0.002237710021970997, "loss": 1.2319, "step": 4710 }, { "epoch": 0.41311594907848276, "grad_norm": 0.055419921875, "learning_rate": 0.00223734297007243, "loss": 1.1681, "step": 4711 }, { "epoch": 0.4132036408528573, "grad_norm": 0.09912109375, "learning_rate": 0.0022369758646097007, "loss": 1.1414, "step": 4712 }, { "epoch": 0.41329133262723183, "grad_norm": 0.07421875, "learning_rate": 0.002236608705616287, "loss": 1.2719, "step": 4713 }, { "epoch": 0.4133790244016064, "grad_norm": 0.1328125, "learning_rate": 0.0022362414931256748, "loss": 1.2135, "step": 4714 }, { "epoch": 0.41346671617598096, "grad_norm": 0.09521484375, "learning_rate": 0.002235874227171351, "loss": 1.1892, "step": 4715 }, { "epoch": 0.4135544079503555, "grad_norm": 0.10400390625, "learning_rate": 0.0022355069077868105, "loss": 1.2861, "step": 4716 }, { "epoch": 0.41364209972473004, "grad_norm": 0.11474609375, "learning_rate": 0.0022351395350055513, "loss": 1.2386, "step": 4717 }, { "epoch": 0.4137297914991046, "grad_norm": 0.05615234375, "learning_rate": 0.002234772108861077, "loss": 1.2132, "step": 4718 }, { "epoch": 0.4138174832734791, "grad_norm": 0.0732421875, "learning_rate": 0.002234404629386896, "loss": 1.235, "step": 4719 }, { "epoch": 0.4139051750478537, "grad_norm": 0.06640625, "learning_rate": 0.002234037096616521, "loss": 1.2373, "step": 4720 }, { "epoch": 0.41399286682222824, "grad_norm": 0.06298828125, "learning_rate": 0.0022336695105834715, "loss": 1.2015, "step": 4721 }, { "epoch": 0.41408055859660275, "grad_norm": 0.08056640625, "learning_rate": 0.0022333018713212686, "loss": 1.2112, "step": 4722 }, { "epoch": 0.4141682503709773, "grad_norm": 0.068359375, "learning_rate": 0.002232934178863441, "loss": 1.2003, "step": 4723 }, { "epoch": 0.41425594214535183, "grad_norm": 0.10888671875, "learning_rate": 0.0022325664332435214, "loss": 1.2657, "step": 4724 }, { "epoch": 0.4143436339197264, "grad_norm": 0.1142578125, "learning_rate": 0.0022321986344950467, "loss": 1.213, "step": 4725 }, { "epoch": 0.41443132569410096, "grad_norm": 0.095703125, "learning_rate": 0.0022318307826515596, "loss": 1.2476, "step": 4726 }, { "epoch": 0.41451901746847547, "grad_norm": 0.1513671875, "learning_rate": 0.0022314628777466068, "loss": 1.2404, "step": 4727 }, { "epoch": 0.41460670924285004, "grad_norm": 0.068359375, "learning_rate": 0.0022310949198137407, "loss": 1.2238, "step": 4728 }, { "epoch": 0.4146944010172246, "grad_norm": 0.1025390625, "learning_rate": 0.002230726908886518, "loss": 1.2164, "step": 4729 }, { "epoch": 0.4147820927915991, "grad_norm": 0.07421875, "learning_rate": 0.0022303588449985, "loss": 1.2144, "step": 4730 }, { "epoch": 0.4148697845659737, "grad_norm": 0.07470703125, "learning_rate": 0.0022299907281832544, "loss": 1.204, "step": 4731 }, { "epoch": 0.41495747634034824, "grad_norm": 0.07763671875, "learning_rate": 0.002229622558474351, "loss": 1.219, "step": 4732 }, { "epoch": 0.41504516811472275, "grad_norm": 0.0771484375, "learning_rate": 0.0022292543359053673, "loss": 1.2276, "step": 4733 }, { "epoch": 0.4151328598890973, "grad_norm": 0.08447265625, "learning_rate": 0.0022288860605098825, "loss": 1.169, "step": 4734 }, { "epoch": 0.4152205516634719, "grad_norm": 0.060546875, "learning_rate": 0.0022285177323214836, "loss": 1.2027, "step": 4735 }, { "epoch": 0.4153082434378464, "grad_norm": 0.09716796875, "learning_rate": 0.0022281493513737613, "loss": 1.2505, "step": 4736 }, { "epoch": 0.41539593521222096, "grad_norm": 0.0703125, "learning_rate": 0.0022277809177003102, "loss": 1.2443, "step": 4737 }, { "epoch": 0.41548362698659547, "grad_norm": 0.1025390625, "learning_rate": 0.0022274124313347316, "loss": 1.2, "step": 4738 }, { "epoch": 0.41557131876097003, "grad_norm": 0.06982421875, "learning_rate": 0.002227043892310629, "loss": 1.2365, "step": 4739 }, { "epoch": 0.4156590105353446, "grad_norm": 0.1328125, "learning_rate": 0.002226675300661614, "loss": 1.1873, "step": 4740 }, { "epoch": 0.4157467023097191, "grad_norm": 0.099609375, "learning_rate": 0.0022263066564212996, "loss": 1.172, "step": 4741 }, { "epoch": 0.4158343940840937, "grad_norm": 0.1318359375, "learning_rate": 0.0022259379596233065, "loss": 1.2529, "step": 4742 }, { "epoch": 0.41592208585846824, "grad_norm": 0.08056640625, "learning_rate": 0.0022255692103012575, "loss": 1.2723, "step": 4743 }, { "epoch": 0.41600977763284275, "grad_norm": 0.0947265625, "learning_rate": 0.0022252004084887824, "loss": 1.2353, "step": 4744 }, { "epoch": 0.4160974694072173, "grad_norm": 0.0810546875, "learning_rate": 0.0022248315542195154, "loss": 1.2535, "step": 4745 }, { "epoch": 0.4161851611815919, "grad_norm": 0.0791015625, "learning_rate": 0.002224462647527094, "loss": 1.2202, "step": 4746 }, { "epoch": 0.4162728529559664, "grad_norm": 0.1279296875, "learning_rate": 0.0022240936884451628, "loss": 1.245, "step": 4747 }, { "epoch": 0.41636054473034095, "grad_norm": 0.05810546875, "learning_rate": 0.002223724677007369, "loss": 1.2404, "step": 4748 }, { "epoch": 0.41644823650471546, "grad_norm": 0.10302734375, "learning_rate": 0.0022233556132473654, "loss": 1.1987, "step": 4749 }, { "epoch": 0.41653592827909003, "grad_norm": 0.1005859375, "learning_rate": 0.0022229864971988105, "loss": 1.1704, "step": 4750 }, { "epoch": 0.4166236200534646, "grad_norm": 0.060546875, "learning_rate": 0.002222617328895366, "loss": 1.2269, "step": 4751 }, { "epoch": 0.4167113118278391, "grad_norm": 0.13671875, "learning_rate": 0.002222248108370699, "loss": 1.2167, "step": 4752 }, { "epoch": 0.41679900360221367, "grad_norm": 0.0654296875, "learning_rate": 0.0022218788356584817, "loss": 1.2698, "step": 4753 }, { "epoch": 0.41688669537658823, "grad_norm": 0.1015625, "learning_rate": 0.002221509510792391, "loss": 1.1682, "step": 4754 }, { "epoch": 0.41697438715096274, "grad_norm": 0.11328125, "learning_rate": 0.002221140133806109, "loss": 1.1864, "step": 4755 }, { "epoch": 0.4170620789253373, "grad_norm": 0.07568359375, "learning_rate": 0.0022207707047333203, "loss": 1.2322, "step": 4756 }, { "epoch": 0.4171497706997119, "grad_norm": 0.1171875, "learning_rate": 0.0022204012236077173, "loss": 1.2395, "step": 4757 }, { "epoch": 0.4172374624740864, "grad_norm": 0.08203125, "learning_rate": 0.002220031690462995, "loss": 1.1871, "step": 4758 }, { "epoch": 0.41732515424846095, "grad_norm": 0.08349609375, "learning_rate": 0.0022196621053328543, "loss": 1.1551, "step": 4759 }, { "epoch": 0.41741284602283546, "grad_norm": 0.1103515625, "learning_rate": 0.0022192924682509995, "loss": 1.239, "step": 4760 }, { "epoch": 0.41750053779721, "grad_norm": 0.06591796875, "learning_rate": 0.002218922779251142, "loss": 1.2173, "step": 4761 }, { "epoch": 0.4175882295715846, "grad_norm": 0.0947265625, "learning_rate": 0.0022185530383669948, "loss": 1.2376, "step": 4762 }, { "epoch": 0.4176759213459591, "grad_norm": 0.0751953125, "learning_rate": 0.002218183245632279, "loss": 1.1975, "step": 4763 }, { "epoch": 0.41776361312033367, "grad_norm": 0.0859375, "learning_rate": 0.0022178134010807176, "loss": 1.232, "step": 4764 }, { "epoch": 0.41785130489470823, "grad_norm": 0.0947265625, "learning_rate": 0.00221744350474604, "loss": 1.2004, "step": 4765 }, { "epoch": 0.41793899666908274, "grad_norm": 0.0810546875, "learning_rate": 0.0022170735566619795, "loss": 1.2265, "step": 4766 }, { "epoch": 0.4180266884434573, "grad_norm": 0.0966796875, "learning_rate": 0.0022167035568622746, "loss": 1.2558, "step": 4767 }, { "epoch": 0.41811438021783187, "grad_norm": 0.05517578125, "learning_rate": 0.002216333505380668, "loss": 1.2077, "step": 4768 }, { "epoch": 0.4182020719922064, "grad_norm": 0.078125, "learning_rate": 0.002215963402250908, "loss": 1.221, "step": 4769 }, { "epoch": 0.41828976376658095, "grad_norm": 0.06787109375, "learning_rate": 0.0022155932475067465, "loss": 1.2083, "step": 4770 }, { "epoch": 0.4183774555409555, "grad_norm": 0.06494140625, "learning_rate": 0.0022152230411819408, "loss": 1.2636, "step": 4771 }, { "epoch": 0.41846514731533, "grad_norm": 0.0703125, "learning_rate": 0.0022148527833102533, "loss": 1.2007, "step": 4772 }, { "epoch": 0.4185528390897046, "grad_norm": 0.068359375, "learning_rate": 0.0022144824739254495, "loss": 1.3035, "step": 4773 }, { "epoch": 0.4186405308640791, "grad_norm": 0.0888671875, "learning_rate": 0.002214112113061302, "loss": 1.2902, "step": 4774 }, { "epoch": 0.41872822263845366, "grad_norm": 0.07666015625, "learning_rate": 0.002213741700751586, "loss": 1.2631, "step": 4775 }, { "epoch": 0.4188159144128282, "grad_norm": 0.0869140625, "learning_rate": 0.002213371237030082, "loss": 1.2837, "step": 4776 }, { "epoch": 0.41890360618720274, "grad_norm": 0.10546875, "learning_rate": 0.0022130007219305753, "loss": 1.3015, "step": 4777 }, { "epoch": 0.4189912979615773, "grad_norm": 0.06787109375, "learning_rate": 0.0022126301554868566, "loss": 1.2507, "step": 4778 }, { "epoch": 0.41907898973595187, "grad_norm": 0.06396484375, "learning_rate": 0.0022122595377327193, "loss": 1.1962, "step": 4779 }, { "epoch": 0.4191666815103264, "grad_norm": 0.07666015625, "learning_rate": 0.0022118888687019645, "loss": 1.2078, "step": 4780 }, { "epoch": 0.41925437328470094, "grad_norm": 0.062255859375, "learning_rate": 0.0022115181484283946, "loss": 1.1823, "step": 4781 }, { "epoch": 0.4193420650590755, "grad_norm": 0.060546875, "learning_rate": 0.0022111473769458197, "loss": 1.249, "step": 4782 }, { "epoch": 0.41942975683345, "grad_norm": 0.06982421875, "learning_rate": 0.0022107765542880528, "loss": 1.2017, "step": 4783 }, { "epoch": 0.4195174486078246, "grad_norm": 0.058837890625, "learning_rate": 0.002210405680488911, "loss": 1.2329, "step": 4784 }, { "epoch": 0.4196051403821991, "grad_norm": 0.05419921875, "learning_rate": 0.002210034755582219, "loss": 1.3111, "step": 4785 }, { "epoch": 0.41969283215657366, "grad_norm": 0.0888671875, "learning_rate": 0.0022096637796018017, "loss": 1.2326, "step": 4786 }, { "epoch": 0.4197805239309482, "grad_norm": 0.05126953125, "learning_rate": 0.002209292752581493, "loss": 1.2073, "step": 4787 }, { "epoch": 0.41986821570532273, "grad_norm": 0.125, "learning_rate": 0.0022089216745551287, "loss": 1.2341, "step": 4788 }, { "epoch": 0.4199559074796973, "grad_norm": 0.052490234375, "learning_rate": 0.0022085505455565514, "loss": 1.207, "step": 4789 }, { "epoch": 0.42004359925407186, "grad_norm": 0.10986328125, "learning_rate": 0.0022081793656196056, "loss": 1.2369, "step": 4790 }, { "epoch": 0.4201312910284464, "grad_norm": 0.05126953125, "learning_rate": 0.002207808134778143, "loss": 1.1998, "step": 4791 }, { "epoch": 0.42021898280282094, "grad_norm": 0.08203125, "learning_rate": 0.002207436853066018, "loss": 1.1889, "step": 4792 }, { "epoch": 0.4203066745771955, "grad_norm": 0.05615234375, "learning_rate": 0.0022070655205170915, "loss": 1.2022, "step": 4793 }, { "epoch": 0.42039436635157, "grad_norm": 0.07666015625, "learning_rate": 0.0022066941371652275, "loss": 1.2365, "step": 4794 }, { "epoch": 0.4204820581259446, "grad_norm": 0.06689453125, "learning_rate": 0.002206322703044295, "loss": 1.2166, "step": 4795 }, { "epoch": 0.4205697499003191, "grad_norm": 0.08203125, "learning_rate": 0.0022059512181881683, "loss": 1.244, "step": 4796 }, { "epoch": 0.42065744167469366, "grad_norm": 0.0576171875, "learning_rate": 0.0022055796826307254, "loss": 1.212, "step": 4797 }, { "epoch": 0.4207451334490682, "grad_norm": 0.10791015625, "learning_rate": 0.00220520809640585, "loss": 1.2015, "step": 4798 }, { "epoch": 0.42083282522344273, "grad_norm": 0.05419921875, "learning_rate": 0.002204836459547429, "loss": 1.2429, "step": 4799 }, { "epoch": 0.4209205169978173, "grad_norm": 0.099609375, "learning_rate": 0.0022044647720893557, "loss": 1.2214, "step": 4800 }, { "epoch": 0.42100820877219186, "grad_norm": 0.061767578125, "learning_rate": 0.0022040930340655267, "loss": 1.2678, "step": 4801 }, { "epoch": 0.42109590054656637, "grad_norm": 0.09619140625, "learning_rate": 0.002203721245509843, "loss": 1.2271, "step": 4802 }, { "epoch": 0.42118359232094094, "grad_norm": 0.05078125, "learning_rate": 0.002203349406456211, "loss": 1.2025, "step": 4803 }, { "epoch": 0.4212712840953155, "grad_norm": 0.11962890625, "learning_rate": 0.002202977516938542, "loss": 1.227, "step": 4804 }, { "epoch": 0.42135897586969, "grad_norm": 0.05419921875, "learning_rate": 0.002202605576990751, "loss": 1.2268, "step": 4805 }, { "epoch": 0.4214466676440646, "grad_norm": 0.11669921875, "learning_rate": 0.0022022335866467573, "loss": 1.1856, "step": 4806 }, { "epoch": 0.4215343594184391, "grad_norm": 0.06005859375, "learning_rate": 0.0022018615459404867, "loss": 1.1973, "step": 4807 }, { "epoch": 0.42162205119281365, "grad_norm": 0.07275390625, "learning_rate": 0.0022014894549058674, "loss": 1.196, "step": 4808 }, { "epoch": 0.4217097429671882, "grad_norm": 0.057373046875, "learning_rate": 0.0022011173135768336, "loss": 1.2816, "step": 4809 }, { "epoch": 0.4217974347415627, "grad_norm": 0.05908203125, "learning_rate": 0.0022007451219873235, "loss": 1.2259, "step": 4810 }, { "epoch": 0.4218851265159373, "grad_norm": 0.05810546875, "learning_rate": 0.00220037288017128, "loss": 1.2303, "step": 4811 }, { "epoch": 0.42197281829031186, "grad_norm": 0.051513671875, "learning_rate": 0.0022000005881626507, "loss": 1.2383, "step": 4812 }, { "epoch": 0.42206051006468637, "grad_norm": 0.054443359375, "learning_rate": 0.002199628245995387, "loss": 1.2477, "step": 4813 }, { "epoch": 0.42214820183906093, "grad_norm": 0.060546875, "learning_rate": 0.002199255853703447, "loss": 1.2207, "step": 4814 }, { "epoch": 0.4222358936134355, "grad_norm": 0.0615234375, "learning_rate": 0.002198883411320791, "loss": 1.2677, "step": 4815 }, { "epoch": 0.42232358538781, "grad_norm": 0.09814453125, "learning_rate": 0.002198510918881384, "loss": 1.2463, "step": 4816 }, { "epoch": 0.4224112771621846, "grad_norm": 0.060791015625, "learning_rate": 0.0021981383764191975, "loss": 1.1822, "step": 4817 }, { "epoch": 0.42249896893655914, "grad_norm": 0.0849609375, "learning_rate": 0.0021977657839682067, "loss": 1.2551, "step": 4818 }, { "epoch": 0.42258666071093365, "grad_norm": 0.06982421875, "learning_rate": 0.0021973931415623896, "loss": 1.254, "step": 4819 }, { "epoch": 0.4226743524853082, "grad_norm": 0.08984375, "learning_rate": 0.0021970204492357316, "loss": 1.2169, "step": 4820 }, { "epoch": 0.4227620442596827, "grad_norm": 0.0556640625, "learning_rate": 0.0021966477070222206, "loss": 1.2236, "step": 4821 }, { "epoch": 0.4228497360340573, "grad_norm": 0.08251953125, "learning_rate": 0.00219627491495585, "loss": 1.2086, "step": 4822 }, { "epoch": 0.42293742780843185, "grad_norm": 0.056396484375, "learning_rate": 0.0021959020730706173, "loss": 1.1703, "step": 4823 }, { "epoch": 0.42302511958280636, "grad_norm": 0.0654296875, "learning_rate": 0.0021955291814005243, "loss": 1.2829, "step": 4824 }, { "epoch": 0.42311281135718093, "grad_norm": 0.055419921875, "learning_rate": 0.002195156239979579, "loss": 1.2168, "step": 4825 }, { "epoch": 0.4232005031315555, "grad_norm": 0.078125, "learning_rate": 0.0021947832488417914, "loss": 1.1778, "step": 4826 }, { "epoch": 0.42328819490593, "grad_norm": 0.07666015625, "learning_rate": 0.0021944102080211783, "loss": 1.1849, "step": 4827 }, { "epoch": 0.42337588668030457, "grad_norm": 0.06884765625, "learning_rate": 0.002194037117551759, "loss": 1.2066, "step": 4828 }, { "epoch": 0.42346357845467913, "grad_norm": 0.08349609375, "learning_rate": 0.00219366397746756, "loss": 1.2338, "step": 4829 }, { "epoch": 0.42355127022905364, "grad_norm": 0.0771484375, "learning_rate": 0.0021932907878026084, "loss": 1.2629, "step": 4830 }, { "epoch": 0.4236389620034282, "grad_norm": 0.09228515625, "learning_rate": 0.0021929175485909404, "loss": 1.2784, "step": 4831 }, { "epoch": 0.4237266537778027, "grad_norm": 0.10400390625, "learning_rate": 0.002192544259866593, "loss": 1.2166, "step": 4832 }, { "epoch": 0.4238143455521773, "grad_norm": 0.1259765625, "learning_rate": 0.0021921709216636102, "loss": 1.2514, "step": 4833 }, { "epoch": 0.42390203732655185, "grad_norm": 0.111328125, "learning_rate": 0.002191797534016039, "loss": 1.2579, "step": 4834 }, { "epoch": 0.42398972910092636, "grad_norm": 0.1484375, "learning_rate": 0.0021914240969579304, "loss": 1.2639, "step": 4835 }, { "epoch": 0.4240774208753009, "grad_norm": 0.083984375, "learning_rate": 0.0021910506105233428, "loss": 1.2446, "step": 4836 }, { "epoch": 0.4241651126496755, "grad_norm": 0.115234375, "learning_rate": 0.0021906770747463357, "loss": 1.1988, "step": 4837 }, { "epoch": 0.42425280442405, "grad_norm": 0.07373046875, "learning_rate": 0.002190303489660975, "loss": 1.2177, "step": 4838 }, { "epoch": 0.42434049619842457, "grad_norm": 0.12060546875, "learning_rate": 0.0021899298553013312, "loss": 1.29, "step": 4839 }, { "epoch": 0.42442818797279913, "grad_norm": 0.09130859375, "learning_rate": 0.0021895561717014786, "loss": 1.2352, "step": 4840 }, { "epoch": 0.42451587974717364, "grad_norm": 0.08447265625, "learning_rate": 0.0021891824388954955, "loss": 1.2595, "step": 4841 }, { "epoch": 0.4246035715215482, "grad_norm": 0.05859375, "learning_rate": 0.002188808656917466, "loss": 1.1421, "step": 4842 }, { "epoch": 0.4246912632959227, "grad_norm": 0.126953125, "learning_rate": 0.002188434825801478, "loss": 1.2621, "step": 4843 }, { "epoch": 0.4247789550702973, "grad_norm": 0.09130859375, "learning_rate": 0.002188060945581624, "loss": 1.2277, "step": 4844 }, { "epoch": 0.42486664684467185, "grad_norm": 0.103515625, "learning_rate": 0.002187687016292001, "loss": 1.2304, "step": 4845 }, { "epoch": 0.42495433861904636, "grad_norm": 0.0654296875, "learning_rate": 0.0021873130379667094, "loss": 1.1995, "step": 4846 }, { "epoch": 0.4250420303934209, "grad_norm": 0.11083984375, "learning_rate": 0.0021869390106398563, "loss": 1.2028, "step": 4847 }, { "epoch": 0.4251297221677955, "grad_norm": 0.062255859375, "learning_rate": 0.0021865649343455512, "loss": 1.2678, "step": 4848 }, { "epoch": 0.42521741394217, "grad_norm": 0.1123046875, "learning_rate": 0.0021861908091179096, "loss": 1.2464, "step": 4849 }, { "epoch": 0.42530510571654456, "grad_norm": 0.07958984375, "learning_rate": 0.00218581663499105, "loss": 1.2358, "step": 4850 }, { "epoch": 0.42539279749091913, "grad_norm": 0.09765625, "learning_rate": 0.0021854424119990973, "loss": 1.2669, "step": 4851 }, { "epoch": 0.42548048926529364, "grad_norm": 0.0927734375, "learning_rate": 0.0021850681401761785, "loss": 1.1968, "step": 4852 }, { "epoch": 0.4255681810396682, "grad_norm": 0.0869140625, "learning_rate": 0.0021846938195564255, "loss": 1.1624, "step": 4853 }, { "epoch": 0.42565587281404277, "grad_norm": 0.055419921875, "learning_rate": 0.0021843194501739776, "loss": 1.1645, "step": 4854 }, { "epoch": 0.4257435645884173, "grad_norm": 0.053955078125, "learning_rate": 0.0021839450320629747, "loss": 1.1355, "step": 4855 }, { "epoch": 0.42583125636279184, "grad_norm": 0.056884765625, "learning_rate": 0.002183570565257564, "loss": 1.2015, "step": 4856 }, { "epoch": 0.42591894813716635, "grad_norm": 0.053466796875, "learning_rate": 0.0021831960497918942, "loss": 1.2719, "step": 4857 }, { "epoch": 0.4260066399115409, "grad_norm": 0.06494140625, "learning_rate": 0.0021828214857001213, "loss": 1.2171, "step": 4858 }, { "epoch": 0.4260943316859155, "grad_norm": 0.0859375, "learning_rate": 0.002182446873016405, "loss": 1.1963, "step": 4859 }, { "epoch": 0.42618202346029, "grad_norm": 0.1357421875, "learning_rate": 0.0021820722117749077, "loss": 1.2053, "step": 4860 }, { "epoch": 0.42626971523466456, "grad_norm": 0.06689453125, "learning_rate": 0.002181697502009799, "loss": 1.2338, "step": 4861 }, { "epoch": 0.4263574070090391, "grad_norm": 0.1220703125, "learning_rate": 0.00218132274375525, "loss": 1.2224, "step": 4862 }, { "epoch": 0.42644509878341363, "grad_norm": 0.047607421875, "learning_rate": 0.0021809479370454386, "loss": 1.1971, "step": 4863 }, { "epoch": 0.4265327905577882, "grad_norm": 0.056640625, "learning_rate": 0.0021805730819145458, "loss": 1.2321, "step": 4864 }, { "epoch": 0.42662048233216276, "grad_norm": 0.05615234375, "learning_rate": 0.002180198178396758, "loss": 1.2385, "step": 4865 }, { "epoch": 0.4267081741065373, "grad_norm": 0.058349609375, "learning_rate": 0.0021798232265262643, "loss": 1.1536, "step": 4866 }, { "epoch": 0.42679586588091184, "grad_norm": 0.059814453125, "learning_rate": 0.0021794482263372606, "loss": 1.1818, "step": 4867 }, { "epoch": 0.42688355765528635, "grad_norm": 0.051025390625, "learning_rate": 0.0021790731778639455, "loss": 1.2383, "step": 4868 }, { "epoch": 0.4269712494296609, "grad_norm": 0.055908203125, "learning_rate": 0.0021786980811405224, "loss": 1.208, "step": 4869 }, { "epoch": 0.4270589412040355, "grad_norm": 0.0615234375, "learning_rate": 0.0021783229362011985, "loss": 1.227, "step": 4870 }, { "epoch": 0.42714663297841, "grad_norm": 0.053466796875, "learning_rate": 0.0021779477430801876, "loss": 1.1964, "step": 4871 }, { "epoch": 0.42723432475278456, "grad_norm": 0.068359375, "learning_rate": 0.0021775725018117046, "loss": 1.2354, "step": 4872 }, { "epoch": 0.4273220165271591, "grad_norm": 0.061279296875, "learning_rate": 0.002177197212429972, "loss": 1.1897, "step": 4873 }, { "epoch": 0.42740970830153363, "grad_norm": 0.0576171875, "learning_rate": 0.0021768218749692136, "loss": 1.2269, "step": 4874 }, { "epoch": 0.4274974000759082, "grad_norm": 0.05712890625, "learning_rate": 0.0021764464894636617, "loss": 1.2817, "step": 4875 }, { "epoch": 0.42758509185028276, "grad_norm": 0.0654296875, "learning_rate": 0.0021760710559475477, "loss": 1.2238, "step": 4876 }, { "epoch": 0.42767278362465727, "grad_norm": 0.095703125, "learning_rate": 0.002175695574455112, "loss": 1.1898, "step": 4877 }, { "epoch": 0.42776047539903184, "grad_norm": 0.05419921875, "learning_rate": 0.002175320045020597, "loss": 1.1799, "step": 4878 }, { "epoch": 0.42784816717340635, "grad_norm": 0.09423828125, "learning_rate": 0.0021749444676782497, "loss": 1.2378, "step": 4879 }, { "epoch": 0.4279358589477809, "grad_norm": 0.07568359375, "learning_rate": 0.002174568842462322, "loss": 1.2687, "step": 4880 }, { "epoch": 0.4280235507221555, "grad_norm": 0.056884765625, "learning_rate": 0.0021741931694070704, "loss": 1.301, "step": 4881 }, { "epoch": 0.42811124249653, "grad_norm": 0.06640625, "learning_rate": 0.002173817448546755, "loss": 1.2232, "step": 4882 }, { "epoch": 0.42819893427090455, "grad_norm": 0.064453125, "learning_rate": 0.00217344167991564, "loss": 1.2343, "step": 4883 }, { "epoch": 0.4282866260452791, "grad_norm": 0.07275390625, "learning_rate": 0.0021730658635479958, "loss": 1.1653, "step": 4884 }, { "epoch": 0.4283743178196536, "grad_norm": 0.06396484375, "learning_rate": 0.0021726899994780947, "loss": 1.3059, "step": 4885 }, { "epoch": 0.4284620095940282, "grad_norm": 0.0595703125, "learning_rate": 0.002172314087740215, "loss": 1.2348, "step": 4886 }, { "epoch": 0.42854970136840276, "grad_norm": 0.0927734375, "learning_rate": 0.002171938128368639, "loss": 1.2231, "step": 4887 }, { "epoch": 0.42863739314277727, "grad_norm": 0.06787109375, "learning_rate": 0.0021715621213976525, "loss": 1.213, "step": 4888 }, { "epoch": 0.42872508491715183, "grad_norm": 0.13671875, "learning_rate": 0.002171186066861548, "loss": 1.2369, "step": 4889 }, { "epoch": 0.4288127766915264, "grad_norm": 0.06396484375, "learning_rate": 0.0021708099647946184, "loss": 1.1831, "step": 4890 }, { "epoch": 0.4289004684659009, "grad_norm": 0.0791015625, "learning_rate": 0.0021704338152311654, "loss": 1.2283, "step": 4891 }, { "epoch": 0.4289881602402755, "grad_norm": 0.053955078125, "learning_rate": 0.002170057618205491, "loss": 1.2856, "step": 4892 }, { "epoch": 0.42907585201465, "grad_norm": 0.06591796875, "learning_rate": 0.002169681373751906, "loss": 1.2255, "step": 4893 }, { "epoch": 0.42916354378902455, "grad_norm": 0.061767578125, "learning_rate": 0.00216930508190472, "loss": 1.2946, "step": 4894 }, { "epoch": 0.4292512355633991, "grad_norm": 0.0927734375, "learning_rate": 0.0021689287426982506, "loss": 1.2196, "step": 4895 }, { "epoch": 0.4293389273377736, "grad_norm": 0.0556640625, "learning_rate": 0.0021685523561668207, "loss": 1.2519, "step": 4896 }, { "epoch": 0.4294266191121482, "grad_norm": 0.1640625, "learning_rate": 0.002168175922344754, "loss": 1.2562, "step": 4897 }, { "epoch": 0.42951431088652275, "grad_norm": 0.06005859375, "learning_rate": 0.002167799441266381, "loss": 1.2322, "step": 4898 }, { "epoch": 0.42960200266089726, "grad_norm": 0.12109375, "learning_rate": 0.002167422912966035, "loss": 1.2503, "step": 4899 }, { "epoch": 0.42968969443527183, "grad_norm": 0.0732421875, "learning_rate": 0.0021670463374780556, "loss": 1.2205, "step": 4900 }, { "epoch": 0.4297773862096464, "grad_norm": 0.130859375, "learning_rate": 0.002166669714836785, "loss": 1.2334, "step": 4901 }, { "epoch": 0.4298650779840209, "grad_norm": 0.07373046875, "learning_rate": 0.0021662930450765706, "loss": 1.2077, "step": 4902 }, { "epoch": 0.42995276975839547, "grad_norm": 0.1630859375, "learning_rate": 0.002165916328231763, "loss": 1.2177, "step": 4903 }, { "epoch": 0.43004046153277, "grad_norm": 0.054443359375, "learning_rate": 0.0021655395643367177, "loss": 1.2124, "step": 4904 }, { "epoch": 0.43012815330714455, "grad_norm": 0.1875, "learning_rate": 0.0021651627534257953, "loss": 1.2311, "step": 4905 }, { "epoch": 0.4302158450815191, "grad_norm": 0.0595703125, "learning_rate": 0.0021647858955333595, "loss": 1.1951, "step": 4906 }, { "epoch": 0.4303035368558936, "grad_norm": 0.12890625, "learning_rate": 0.002164408990693779, "loss": 1.1425, "step": 4907 }, { "epoch": 0.4303912286302682, "grad_norm": 0.123046875, "learning_rate": 0.0021640320389414262, "loss": 1.2391, "step": 4908 }, { "epoch": 0.43047892040464275, "grad_norm": 0.1513671875, "learning_rate": 0.002163655040310679, "loss": 1.1713, "step": 4909 }, { "epoch": 0.43056661217901726, "grad_norm": 0.130859375, "learning_rate": 0.002163277994835918, "loss": 1.2468, "step": 4910 }, { "epoch": 0.4306543039533918, "grad_norm": 0.138671875, "learning_rate": 0.002162900902551529, "loss": 1.2277, "step": 4911 }, { "epoch": 0.4307419957277664, "grad_norm": 0.126953125, "learning_rate": 0.002162523763491901, "loss": 1.1886, "step": 4912 }, { "epoch": 0.4308296875021409, "grad_norm": 0.1298828125, "learning_rate": 0.0021621465776914296, "loss": 1.2378, "step": 4913 }, { "epoch": 0.43091737927651547, "grad_norm": 0.12109375, "learning_rate": 0.0021617693451845125, "loss": 1.1662, "step": 4914 }, { "epoch": 0.43100507105089, "grad_norm": 0.053466796875, "learning_rate": 0.0021613920660055515, "loss": 1.1841, "step": 4915 }, { "epoch": 0.43109276282526454, "grad_norm": 0.09423828125, "learning_rate": 0.0021610147401889546, "loss": 1.2203, "step": 4916 }, { "epoch": 0.4311804545996391, "grad_norm": 0.0791015625, "learning_rate": 0.002160637367769133, "loss": 1.1976, "step": 4917 }, { "epoch": 0.4312681463740136, "grad_norm": 0.072265625, "learning_rate": 0.002160259948780501, "loss": 1.2167, "step": 4918 }, { "epoch": 0.4313558381483882, "grad_norm": 0.10986328125, "learning_rate": 0.0021598824832574793, "loss": 1.2444, "step": 4919 }, { "epoch": 0.43144352992276275, "grad_norm": 0.0888671875, "learning_rate": 0.0021595049712344913, "loss": 1.2692, "step": 4920 }, { "epoch": 0.43153122169713726, "grad_norm": 0.07470703125, "learning_rate": 0.002159127412745965, "loss": 1.2196, "step": 4921 }, { "epoch": 0.4316189134715118, "grad_norm": 0.07763671875, "learning_rate": 0.002158749807826333, "loss": 1.251, "step": 4922 }, { "epoch": 0.4317066052458864, "grad_norm": 0.051025390625, "learning_rate": 0.002158372156510032, "loss": 1.2342, "step": 4923 }, { "epoch": 0.4317942970202609, "grad_norm": 0.07275390625, "learning_rate": 0.0021579944588315026, "loss": 1.3067, "step": 4924 }, { "epoch": 0.43188198879463546, "grad_norm": 0.07177734375, "learning_rate": 0.00215761671482519, "loss": 1.2001, "step": 4925 }, { "epoch": 0.43196968056901003, "grad_norm": 0.062255859375, "learning_rate": 0.0021572389245255435, "loss": 1.2387, "step": 4926 }, { "epoch": 0.43205737234338454, "grad_norm": 0.0654296875, "learning_rate": 0.002156861087967017, "loss": 1.2048, "step": 4927 }, { "epoch": 0.4321450641177591, "grad_norm": 0.054443359375, "learning_rate": 0.002156483205184066, "loss": 1.1875, "step": 4928 }, { "epoch": 0.4322327558921336, "grad_norm": 0.0810546875, "learning_rate": 0.0021561052762111566, "loss": 1.2157, "step": 4929 }, { "epoch": 0.4323204476665082, "grad_norm": 0.0732421875, "learning_rate": 0.002155727301082751, "loss": 1.2844, "step": 4930 }, { "epoch": 0.43240813944088274, "grad_norm": 0.06689453125, "learning_rate": 0.002155349279833321, "loss": 1.135, "step": 4931 }, { "epoch": 0.43249583121525725, "grad_norm": 0.09033203125, "learning_rate": 0.0021549712124973417, "loss": 1.2108, "step": 4932 }, { "epoch": 0.4325835229896318, "grad_norm": 0.056640625, "learning_rate": 0.002154593099109291, "loss": 1.2304, "step": 4933 }, { "epoch": 0.4326712147640064, "grad_norm": 0.052490234375, "learning_rate": 0.002154214939703652, "loss": 1.1933, "step": 4934 }, { "epoch": 0.4327589065383809, "grad_norm": 0.08642578125, "learning_rate": 0.0021538367343149132, "loss": 1.1797, "step": 4935 }, { "epoch": 0.43284659831275546, "grad_norm": 0.054443359375, "learning_rate": 0.002153458482977564, "loss": 1.2104, "step": 4936 }, { "epoch": 0.43293429008713, "grad_norm": 0.06689453125, "learning_rate": 0.0021530801857261015, "loss": 1.2589, "step": 4937 }, { "epoch": 0.43302198186150453, "grad_norm": 0.05712890625, "learning_rate": 0.0021527018425950245, "loss": 1.1719, "step": 4938 }, { "epoch": 0.4331096736358791, "grad_norm": 0.064453125, "learning_rate": 0.0021523234536188368, "loss": 1.2358, "step": 4939 }, { "epoch": 0.4331973654102536, "grad_norm": 0.07470703125, "learning_rate": 0.002151945018832047, "loss": 1.2262, "step": 4940 }, { "epoch": 0.4332850571846282, "grad_norm": 0.06005859375, "learning_rate": 0.0021515665382691672, "loss": 1.236, "step": 4941 }, { "epoch": 0.43337274895900274, "grad_norm": 0.1083984375, "learning_rate": 0.002151188011964714, "loss": 1.2316, "step": 4942 }, { "epoch": 0.43346044073337725, "grad_norm": 0.057373046875, "learning_rate": 0.002150809439953208, "loss": 1.245, "step": 4943 }, { "epoch": 0.4335481325077518, "grad_norm": 0.1181640625, "learning_rate": 0.0021504308222691742, "loss": 1.2345, "step": 4944 }, { "epoch": 0.4336358242821264, "grad_norm": 0.05517578125, "learning_rate": 0.002150052158947141, "loss": 1.1722, "step": 4945 }, { "epoch": 0.4337235160565009, "grad_norm": 0.11083984375, "learning_rate": 0.0021496734500216414, "loss": 1.2195, "step": 4946 }, { "epoch": 0.43381120783087546, "grad_norm": 0.05908203125, "learning_rate": 0.002149294695527214, "loss": 1.1989, "step": 4947 }, { "epoch": 0.43389889960525, "grad_norm": 0.09716796875, "learning_rate": 0.0021489158954983985, "loss": 1.237, "step": 4948 }, { "epoch": 0.43398659137962453, "grad_norm": 0.04736328125, "learning_rate": 0.0021485370499697413, "loss": 1.1777, "step": 4949 }, { "epoch": 0.4340742831539991, "grad_norm": 0.06787109375, "learning_rate": 0.002148158158975792, "loss": 1.2048, "step": 4950 }, { "epoch": 0.4341619749283736, "grad_norm": 0.0546875, "learning_rate": 0.002147779222551105, "loss": 1.2105, "step": 4951 }, { "epoch": 0.43424966670274817, "grad_norm": 0.05224609375, "learning_rate": 0.002147400240730238, "loss": 1.2053, "step": 4952 }, { "epoch": 0.43433735847712274, "grad_norm": 0.052734375, "learning_rate": 0.0021470212135477527, "loss": 1.2209, "step": 4953 }, { "epoch": 0.43442505025149725, "grad_norm": 0.056396484375, "learning_rate": 0.002146642141038216, "loss": 1.2358, "step": 4954 }, { "epoch": 0.4345127420258718, "grad_norm": 0.06591796875, "learning_rate": 0.0021462630232361984, "loss": 1.182, "step": 4955 }, { "epoch": 0.4346004338002464, "grad_norm": 0.0634765625, "learning_rate": 0.0021458838601762734, "loss": 1.2404, "step": 4956 }, { "epoch": 0.4346881255746209, "grad_norm": 0.07666015625, "learning_rate": 0.0021455046518930212, "loss": 1.1782, "step": 4957 }, { "epoch": 0.43477581734899545, "grad_norm": 0.09619140625, "learning_rate": 0.002145125398421023, "loss": 1.2351, "step": 4958 }, { "epoch": 0.43486350912337, "grad_norm": 0.07373046875, "learning_rate": 0.002144746099794867, "loss": 1.2172, "step": 4959 }, { "epoch": 0.4349512008977445, "grad_norm": 0.08984375, "learning_rate": 0.0021443667560491447, "loss": 1.1805, "step": 4960 }, { "epoch": 0.4350388926721191, "grad_norm": 0.057373046875, "learning_rate": 0.00214398736721845, "loss": 1.1982, "step": 4961 }, { "epoch": 0.43512658444649366, "grad_norm": 0.072265625, "learning_rate": 0.0021436079333373823, "loss": 1.2351, "step": 4962 }, { "epoch": 0.43521427622086817, "grad_norm": 0.08349609375, "learning_rate": 0.002143228454440545, "loss": 1.2365, "step": 4963 }, { "epoch": 0.43530196799524273, "grad_norm": 0.0546875, "learning_rate": 0.002142848930562547, "loss": 1.1316, "step": 4964 }, { "epoch": 0.43538965976961724, "grad_norm": 0.1064453125, "learning_rate": 0.0021424693617379978, "loss": 1.2361, "step": 4965 }, { "epoch": 0.4354773515439918, "grad_norm": 0.048583984375, "learning_rate": 0.0021420897480015147, "loss": 1.2385, "step": 4966 }, { "epoch": 0.4355650433183664, "grad_norm": 0.14453125, "learning_rate": 0.0021417100893877166, "loss": 1.2583, "step": 4967 }, { "epoch": 0.4356527350927409, "grad_norm": 0.0556640625, "learning_rate": 0.002141330385931228, "loss": 1.201, "step": 4968 }, { "epoch": 0.43574042686711545, "grad_norm": 0.12060546875, "learning_rate": 0.0021409506376666764, "loss": 1.1987, "step": 4969 }, { "epoch": 0.43582811864149, "grad_norm": 0.07470703125, "learning_rate": 0.0021405708446286943, "loss": 1.1923, "step": 4970 }, { "epoch": 0.4359158104158645, "grad_norm": 0.07470703125, "learning_rate": 0.002140191006851917, "loss": 1.1894, "step": 4971 }, { "epoch": 0.4360035021902391, "grad_norm": 0.05322265625, "learning_rate": 0.0021398111243709854, "loss": 1.181, "step": 4972 }, { "epoch": 0.43609119396461365, "grad_norm": 0.05615234375, "learning_rate": 0.002139431197220544, "loss": 1.2429, "step": 4973 }, { "epoch": 0.43617888573898816, "grad_norm": 0.051025390625, "learning_rate": 0.0021390512254352407, "loss": 1.1917, "step": 4974 }, { "epoch": 0.43626657751336273, "grad_norm": 0.053466796875, "learning_rate": 0.0021386712090497285, "loss": 1.1559, "step": 4975 }, { "epoch": 0.43635426928773724, "grad_norm": 0.055908203125, "learning_rate": 0.0021382911480986627, "loss": 1.2571, "step": 4976 }, { "epoch": 0.4364419610621118, "grad_norm": 0.0625, "learning_rate": 0.002137911042616706, "loss": 1.2164, "step": 4977 }, { "epoch": 0.43652965283648637, "grad_norm": 0.05517578125, "learning_rate": 0.0021375308926385207, "loss": 1.2768, "step": 4978 }, { "epoch": 0.4366173446108609, "grad_norm": 0.055908203125, "learning_rate": 0.002137150698198777, "loss": 1.1486, "step": 4979 }, { "epoch": 0.43670503638523545, "grad_norm": 0.0625, "learning_rate": 0.0021367704593321466, "loss": 1.2355, "step": 4980 }, { "epoch": 0.43679272815961, "grad_norm": 0.053466796875, "learning_rate": 0.002136390176073307, "loss": 1.2068, "step": 4981 }, { "epoch": 0.4368804199339845, "grad_norm": 0.06689453125, "learning_rate": 0.0021360098484569396, "loss": 1.1927, "step": 4982 }, { "epoch": 0.4369681117083591, "grad_norm": 0.064453125, "learning_rate": 0.002135629476517728, "loss": 1.3049, "step": 4983 }, { "epoch": 0.43705580348273365, "grad_norm": 0.09326171875, "learning_rate": 0.0021352490602903626, "loss": 1.1372, "step": 4984 }, { "epoch": 0.43714349525710816, "grad_norm": 0.07958984375, "learning_rate": 0.0021348685998095348, "loss": 1.2266, "step": 4985 }, { "epoch": 0.4372311870314827, "grad_norm": 0.0732421875, "learning_rate": 0.002134488095109943, "loss": 1.2211, "step": 4986 }, { "epoch": 0.43731887880585724, "grad_norm": 0.08837890625, "learning_rate": 0.002134107546226287, "loss": 1.229, "step": 4987 }, { "epoch": 0.4374065705802318, "grad_norm": 0.08544921875, "learning_rate": 0.002133726953193273, "loss": 1.2322, "step": 4988 }, { "epoch": 0.43749426235460637, "grad_norm": 0.146484375, "learning_rate": 0.0021333463160456093, "loss": 1.1921, "step": 4989 }, { "epoch": 0.4375819541289809, "grad_norm": 0.08740234375, "learning_rate": 0.002132965634818009, "loss": 1.2398, "step": 4990 }, { "epoch": 0.43766964590335544, "grad_norm": 0.158203125, "learning_rate": 0.0021325849095451903, "loss": 1.2872, "step": 4991 }, { "epoch": 0.43775733767773, "grad_norm": 0.11962890625, "learning_rate": 0.0021322041402618733, "loss": 1.3158, "step": 4992 }, { "epoch": 0.4378450294521045, "grad_norm": 0.22265625, "learning_rate": 0.002131823327002784, "loss": 1.2775, "step": 4993 }, { "epoch": 0.4379327212264791, "grad_norm": 0.1181640625, "learning_rate": 0.0021314424698026502, "loss": 1.2184, "step": 4994 }, { "epoch": 0.43802041300085365, "grad_norm": 0.193359375, "learning_rate": 0.002131061568696207, "loss": 1.2423, "step": 4995 }, { "epoch": 0.43810810477522816, "grad_norm": 0.1630859375, "learning_rate": 0.0021306806237181893, "loss": 1.2185, "step": 4996 }, { "epoch": 0.4381957965496027, "grad_norm": 0.13671875, "learning_rate": 0.002130299634903341, "loss": 1.1971, "step": 4997 }, { "epoch": 0.43828348832397723, "grad_norm": 0.158203125, "learning_rate": 0.0021299186022864045, "loss": 1.2346, "step": 4998 }, { "epoch": 0.4383711800983518, "grad_norm": 0.07666015625, "learning_rate": 0.0021295375259021312, "loss": 1.1964, "step": 4999 }, { "epoch": 0.43845887187272636, "grad_norm": 0.146484375, "learning_rate": 0.002129156405785273, "loss": 1.1961, "step": 5000 }, { "epoch": 0.43845887187272636, "eval_loss": 1.2292165756225586, "eval_runtime": 429.1072, "eval_samples_per_second": 33.668, "eval_steps_per_second": 8.417, "step": 5000 }, { "epoch": 0.4385465636471009, "grad_norm": 0.10107421875, "learning_rate": 0.002128775241970588, "loss": 1.2518, "step": 5001 }, { "epoch": 0.43863425542147544, "grad_norm": 0.06982421875, "learning_rate": 0.002128394034492837, "loss": 1.1714, "step": 5002 }, { "epoch": 0.43872194719585, "grad_norm": 0.11962890625, "learning_rate": 0.0021280127833867846, "loss": 1.2292, "step": 5003 }, { "epoch": 0.4388096389702245, "grad_norm": 0.060546875, "learning_rate": 0.0021276314886872006, "loss": 1.1906, "step": 5004 }, { "epoch": 0.4388973307445991, "grad_norm": 0.10986328125, "learning_rate": 0.0021272501504288575, "loss": 1.2302, "step": 5005 }, { "epoch": 0.43898502251897364, "grad_norm": 0.12890625, "learning_rate": 0.0021268687686465328, "loss": 1.1844, "step": 5006 }, { "epoch": 0.43907271429334815, "grad_norm": 0.0771484375, "learning_rate": 0.002126487343375008, "loss": 1.1975, "step": 5007 }, { "epoch": 0.4391604060677227, "grad_norm": 0.09765625, "learning_rate": 0.002126105874649067, "loss": 1.1603, "step": 5008 }, { "epoch": 0.4392480978420973, "grad_norm": 0.09375, "learning_rate": 0.002125724362503499, "loss": 1.1937, "step": 5009 }, { "epoch": 0.4393357896164718, "grad_norm": 0.06591796875, "learning_rate": 0.002125342806973098, "loss": 1.2095, "step": 5010 }, { "epoch": 0.43942348139084636, "grad_norm": 0.103515625, "learning_rate": 0.0021249612080926593, "loss": 1.2814, "step": 5011 }, { "epoch": 0.43951117316522087, "grad_norm": 0.053466796875, "learning_rate": 0.0021245795658969852, "loss": 1.2037, "step": 5012 }, { "epoch": 0.43959886493959544, "grad_norm": 0.057373046875, "learning_rate": 0.00212419788042088, "loss": 1.2157, "step": 5013 }, { "epoch": 0.43968655671397, "grad_norm": 0.083984375, "learning_rate": 0.0021238161516991512, "loss": 1.2193, "step": 5014 }, { "epoch": 0.4397742484883445, "grad_norm": 0.05029296875, "learning_rate": 0.002123434379766613, "loss": 1.2784, "step": 5015 }, { "epoch": 0.4398619402627191, "grad_norm": 0.0712890625, "learning_rate": 0.0021230525646580814, "loss": 1.1988, "step": 5016 }, { "epoch": 0.43994963203709364, "grad_norm": 0.08642578125, "learning_rate": 0.0021226707064083772, "loss": 1.1917, "step": 5017 }, { "epoch": 0.44003732381146815, "grad_norm": 0.07373046875, "learning_rate": 0.002122288805052325, "loss": 1.3056, "step": 5018 }, { "epoch": 0.4401250155858427, "grad_norm": 0.08154296875, "learning_rate": 0.002121906860624753, "loss": 1.2018, "step": 5019 }, { "epoch": 0.4402127073602173, "grad_norm": 0.057861328125, "learning_rate": 0.0021215248731604936, "loss": 1.2025, "step": 5020 }, { "epoch": 0.4403003991345918, "grad_norm": 0.0703125, "learning_rate": 0.002121142842694383, "loss": 1.2334, "step": 5021 }, { "epoch": 0.44038809090896636, "grad_norm": 0.0849609375, "learning_rate": 0.002120760769261261, "loss": 1.1549, "step": 5022 }, { "epoch": 0.44047578268334087, "grad_norm": 0.05810546875, "learning_rate": 0.0021203786528959717, "loss": 1.2858, "step": 5023 }, { "epoch": 0.44056347445771543, "grad_norm": 0.10595703125, "learning_rate": 0.0021199964936333645, "loss": 1.1477, "step": 5024 }, { "epoch": 0.44065116623209, "grad_norm": 0.078125, "learning_rate": 0.00211961429150829, "loss": 1.1854, "step": 5025 }, { "epoch": 0.4407388580064645, "grad_norm": 0.0791015625, "learning_rate": 0.0021192320465556044, "loss": 1.2333, "step": 5026 }, { "epoch": 0.44082654978083907, "grad_norm": 0.0830078125, "learning_rate": 0.0021188497588101674, "loss": 1.2431, "step": 5027 }, { "epoch": 0.44091424155521364, "grad_norm": 0.078125, "learning_rate": 0.002118467428306843, "loss": 1.1783, "step": 5028 }, { "epoch": 0.44100193332958815, "grad_norm": 0.09228515625, "learning_rate": 0.0021180850550804983, "loss": 1.2757, "step": 5029 }, { "epoch": 0.4410896251039627, "grad_norm": 0.06689453125, "learning_rate": 0.0021177026391660044, "loss": 1.2159, "step": 5030 }, { "epoch": 0.4411773168783373, "grad_norm": 0.1904296875, "learning_rate": 0.002117320180598238, "loss": 1.2928, "step": 5031 }, { "epoch": 0.4412650086527118, "grad_norm": 0.060302734375, "learning_rate": 0.0021169376794120767, "loss": 1.2558, "step": 5032 }, { "epoch": 0.44135270042708635, "grad_norm": 0.1806640625, "learning_rate": 0.0021165551356424053, "loss": 1.2232, "step": 5033 }, { "epoch": 0.44144039220146086, "grad_norm": 0.07470703125, "learning_rate": 0.0021161725493241096, "loss": 1.2949, "step": 5034 }, { "epoch": 0.44152808397583543, "grad_norm": 0.1533203125, "learning_rate": 0.002115789920492081, "loss": 1.1882, "step": 5035 }, { "epoch": 0.44161577575021, "grad_norm": 0.09228515625, "learning_rate": 0.0021154072491812136, "loss": 1.2538, "step": 5036 }, { "epoch": 0.4417034675245845, "grad_norm": 0.11962890625, "learning_rate": 0.0021150245354264073, "loss": 1.167, "step": 5037 }, { "epoch": 0.44179115929895907, "grad_norm": 0.1328125, "learning_rate": 0.002114641779262563, "loss": 1.266, "step": 5038 }, { "epoch": 0.44187885107333363, "grad_norm": 0.09765625, "learning_rate": 0.002114258980724588, "loss": 1.2697, "step": 5039 }, { "epoch": 0.44196654284770814, "grad_norm": 0.10986328125, "learning_rate": 0.002113876139847393, "loss": 1.225, "step": 5040 }, { "epoch": 0.4420542346220827, "grad_norm": 0.08642578125, "learning_rate": 0.0021134932566658905, "loss": 1.2076, "step": 5041 }, { "epoch": 0.4421419263964573, "grad_norm": 0.07421875, "learning_rate": 0.002113110331215001, "loss": 1.2108, "step": 5042 }, { "epoch": 0.4422296181708318, "grad_norm": 0.0703125, "learning_rate": 0.0021127273635296435, "loss": 1.2553, "step": 5043 }, { "epoch": 0.44231730994520635, "grad_norm": 0.08154296875, "learning_rate": 0.0021123443536447454, "loss": 1.223, "step": 5044 }, { "epoch": 0.4424050017195809, "grad_norm": 0.06396484375, "learning_rate": 0.0021119613015952356, "loss": 1.1526, "step": 5045 }, { "epoch": 0.4424926934939554, "grad_norm": 0.07177734375, "learning_rate": 0.002111578207416048, "loss": 1.1351, "step": 5046 }, { "epoch": 0.44258038526833, "grad_norm": 0.07470703125, "learning_rate": 0.0021111950711421197, "loss": 1.2871, "step": 5047 }, { "epoch": 0.4426680770427045, "grad_norm": 0.0712890625, "learning_rate": 0.0021108118928083912, "loss": 1.2834, "step": 5048 }, { "epoch": 0.44275576881707907, "grad_norm": 0.11767578125, "learning_rate": 0.0021104286724498073, "loss": 1.2897, "step": 5049 }, { "epoch": 0.44284346059145363, "grad_norm": 0.05908203125, "learning_rate": 0.0021100454101013176, "loss": 1.1931, "step": 5050 }, { "epoch": 0.44293115236582814, "grad_norm": 0.12060546875, "learning_rate": 0.0021096621057978735, "loss": 1.2298, "step": 5051 }, { "epoch": 0.4430188441402027, "grad_norm": 0.095703125, "learning_rate": 0.002109278759574432, "loss": 1.1803, "step": 5052 }, { "epoch": 0.44310653591457727, "grad_norm": 0.1298828125, "learning_rate": 0.002108895371465954, "loss": 1.1596, "step": 5053 }, { "epoch": 0.4431942276889518, "grad_norm": 0.12353515625, "learning_rate": 0.002108511941507402, "loss": 1.2019, "step": 5054 }, { "epoch": 0.44328191946332635, "grad_norm": 0.060546875, "learning_rate": 0.002108128469733745, "loss": 1.2297, "step": 5055 }, { "epoch": 0.4433696112377009, "grad_norm": 0.0927734375, "learning_rate": 0.0021077449561799537, "loss": 1.2349, "step": 5056 }, { "epoch": 0.4434573030120754, "grad_norm": 0.1123046875, "learning_rate": 0.002107361400881005, "loss": 1.2346, "step": 5057 }, { "epoch": 0.44354499478645, "grad_norm": 0.08251953125, "learning_rate": 0.0021069778038718765, "loss": 1.2647, "step": 5058 }, { "epoch": 0.4436326865608245, "grad_norm": 0.11376953125, "learning_rate": 0.002106594165187552, "loss": 1.1943, "step": 5059 }, { "epoch": 0.44372037833519906, "grad_norm": 0.08837890625, "learning_rate": 0.002106210484863018, "loss": 1.1937, "step": 5060 }, { "epoch": 0.4438080701095736, "grad_norm": 0.1171875, "learning_rate": 0.0021058267629332664, "loss": 1.2664, "step": 5061 }, { "epoch": 0.44389576188394814, "grad_norm": 0.08740234375, "learning_rate": 0.00210544299943329, "loss": 1.2507, "step": 5062 }, { "epoch": 0.4439834536583227, "grad_norm": 0.0986328125, "learning_rate": 0.002105059194398088, "loss": 1.2176, "step": 5063 }, { "epoch": 0.44407114543269727, "grad_norm": 0.061279296875, "learning_rate": 0.0021046753478626626, "loss": 1.2586, "step": 5064 }, { "epoch": 0.4441588372070718, "grad_norm": 0.076171875, "learning_rate": 0.002104291459862018, "loss": 1.1863, "step": 5065 }, { "epoch": 0.44424652898144634, "grad_norm": 0.11181640625, "learning_rate": 0.0021039075304311664, "loss": 1.2505, "step": 5066 }, { "epoch": 0.4443342207558209, "grad_norm": 0.0654296875, "learning_rate": 0.002103523559605119, "loss": 1.2709, "step": 5067 }, { "epoch": 0.4444219125301954, "grad_norm": 0.06396484375, "learning_rate": 0.002103139547418894, "loss": 1.1805, "step": 5068 }, { "epoch": 0.44450960430457, "grad_norm": 0.05908203125, "learning_rate": 0.002102755493907512, "loss": 1.2116, "step": 5069 }, { "epoch": 0.4445972960789445, "grad_norm": 0.0859375, "learning_rate": 0.002102371399105998, "loss": 1.2745, "step": 5070 }, { "epoch": 0.44468498785331906, "grad_norm": 0.054443359375, "learning_rate": 0.00210198726304938, "loss": 1.1986, "step": 5071 }, { "epoch": 0.4447726796276936, "grad_norm": 0.068359375, "learning_rate": 0.0021016030857726902, "loss": 1.183, "step": 5072 }, { "epoch": 0.44486037140206813, "grad_norm": 0.072265625, "learning_rate": 0.002101218867310965, "loss": 1.1632, "step": 5073 }, { "epoch": 0.4449480631764427, "grad_norm": 0.058349609375, "learning_rate": 0.0021008346076992436, "loss": 1.1854, "step": 5074 }, { "epoch": 0.44503575495081726, "grad_norm": 0.06005859375, "learning_rate": 0.0021004503069725704, "loss": 1.1938, "step": 5075 }, { "epoch": 0.4451234467251918, "grad_norm": 0.064453125, "learning_rate": 0.0021000659651659913, "loss": 1.1867, "step": 5076 }, { "epoch": 0.44521113849956634, "grad_norm": 0.057373046875, "learning_rate": 0.0020996815823145585, "loss": 1.1982, "step": 5077 }, { "epoch": 0.4452988302739409, "grad_norm": 0.060546875, "learning_rate": 0.002099297158453326, "loss": 1.2776, "step": 5078 }, { "epoch": 0.4453865220483154, "grad_norm": 0.06787109375, "learning_rate": 0.0020989126936173524, "loss": 1.226, "step": 5079 }, { "epoch": 0.44547421382269, "grad_norm": 0.06005859375, "learning_rate": 0.0020985281878417006, "loss": 1.2431, "step": 5080 }, { "epoch": 0.44556190559706454, "grad_norm": 0.0556640625, "learning_rate": 0.002098143641161436, "loss": 1.2413, "step": 5081 }, { "epoch": 0.44564959737143905, "grad_norm": 0.1484375, "learning_rate": 0.002097759053611627, "loss": 1.2522, "step": 5082 }, { "epoch": 0.4457372891458136, "grad_norm": 0.0673828125, "learning_rate": 0.0020973744252273494, "loss": 1.2209, "step": 5083 }, { "epoch": 0.44582498092018813, "grad_norm": 0.17578125, "learning_rate": 0.0020969897560436787, "loss": 1.2822, "step": 5084 }, { "epoch": 0.4459126726945627, "grad_norm": 0.054931640625, "learning_rate": 0.002096605046095696, "loss": 1.2488, "step": 5085 }, { "epoch": 0.44600036446893726, "grad_norm": 0.0712890625, "learning_rate": 0.0020962202954184863, "loss": 1.1945, "step": 5086 }, { "epoch": 0.44608805624331177, "grad_norm": 0.103515625, "learning_rate": 0.002095835504047137, "loss": 1.2157, "step": 5087 }, { "epoch": 0.44617574801768634, "grad_norm": 0.060302734375, "learning_rate": 0.0020954506720167417, "loss": 1.2116, "step": 5088 }, { "epoch": 0.4462634397920609, "grad_norm": 0.1220703125, "learning_rate": 0.002095065799362394, "loss": 1.1995, "step": 5089 }, { "epoch": 0.4463511315664354, "grad_norm": 0.06591796875, "learning_rate": 0.0020946808861191948, "loss": 1.1729, "step": 5090 }, { "epoch": 0.44643882334081, "grad_norm": 0.125, "learning_rate": 0.002094295932322247, "loss": 1.2142, "step": 5091 }, { "epoch": 0.44652651511518454, "grad_norm": 0.09375, "learning_rate": 0.0020939109380066568, "loss": 1.2338, "step": 5092 }, { "epoch": 0.44661420688955905, "grad_norm": 0.0634765625, "learning_rate": 0.0020935259032075345, "loss": 1.156, "step": 5093 }, { "epoch": 0.4467018986639336, "grad_norm": 0.06884765625, "learning_rate": 0.002093140827959995, "loss": 1.1757, "step": 5094 }, { "epoch": 0.4467895904383081, "grad_norm": 0.05419921875, "learning_rate": 0.0020927557122991563, "loss": 1.2242, "step": 5095 }, { "epoch": 0.4468772822126827, "grad_norm": 0.06884765625, "learning_rate": 0.00209237055626014, "loss": 1.1617, "step": 5096 }, { "epoch": 0.44696497398705726, "grad_norm": 0.07666015625, "learning_rate": 0.0020919853598780703, "loss": 1.1576, "step": 5097 }, { "epoch": 0.44705266576143177, "grad_norm": 0.055908203125, "learning_rate": 0.002091600123188077, "loss": 1.2475, "step": 5098 }, { "epoch": 0.44714035753580633, "grad_norm": 0.05908203125, "learning_rate": 0.002091214846225292, "loss": 1.2266, "step": 5099 }, { "epoch": 0.4472280493101809, "grad_norm": 0.08154296875, "learning_rate": 0.0020908295290248528, "loss": 1.2213, "step": 5100 }, { "epoch": 0.4473157410845554, "grad_norm": 0.12353515625, "learning_rate": 0.002090444171621898, "loss": 1.2706, "step": 5101 }, { "epoch": 0.44740343285893, "grad_norm": 0.078125, "learning_rate": 0.0020900587740515716, "loss": 1.2073, "step": 5102 }, { "epoch": 0.44749112463330454, "grad_norm": 0.0634765625, "learning_rate": 0.0020896733363490218, "loss": 1.2098, "step": 5103 }, { "epoch": 0.44757881640767905, "grad_norm": 0.06298828125, "learning_rate": 0.0020892878585493974, "loss": 1.2815, "step": 5104 }, { "epoch": 0.4476665081820536, "grad_norm": 0.1064453125, "learning_rate": 0.0020889023406878557, "loss": 1.2487, "step": 5105 }, { "epoch": 0.4477541999564281, "grad_norm": 0.06982421875, "learning_rate": 0.002088516782799553, "loss": 1.29, "step": 5106 }, { "epoch": 0.4478418917308027, "grad_norm": 0.068359375, "learning_rate": 0.0020881311849196517, "loss": 1.2537, "step": 5107 }, { "epoch": 0.44792958350517725, "grad_norm": 0.06640625, "learning_rate": 0.0020877455470833177, "loss": 1.2316, "step": 5108 }, { "epoch": 0.44801727527955176, "grad_norm": 0.0615234375, "learning_rate": 0.002087359869325719, "loss": 1.2814, "step": 5109 }, { "epoch": 0.44810496705392633, "grad_norm": 0.05712890625, "learning_rate": 0.00208697415168203, "loss": 1.2524, "step": 5110 }, { "epoch": 0.4481926588283009, "grad_norm": 0.07275390625, "learning_rate": 0.0020865883941874257, "loss": 1.1533, "step": 5111 }, { "epoch": 0.4482803506026754, "grad_norm": 0.054443359375, "learning_rate": 0.002086202596877088, "loss": 1.1845, "step": 5112 }, { "epoch": 0.44836804237704997, "grad_norm": 0.06396484375, "learning_rate": 0.0020858167597861987, "loss": 1.2226, "step": 5113 }, { "epoch": 0.44845573415142453, "grad_norm": 0.0810546875, "learning_rate": 0.0020854308829499463, "loss": 1.2619, "step": 5114 }, { "epoch": 0.44854342592579904, "grad_norm": 0.05859375, "learning_rate": 0.0020850449664035213, "loss": 1.1599, "step": 5115 }, { "epoch": 0.4486311177001736, "grad_norm": 0.09619140625, "learning_rate": 0.002084659010182118, "loss": 1.2593, "step": 5116 }, { "epoch": 0.4487188094745482, "grad_norm": 0.08984375, "learning_rate": 0.0020842730143209357, "loss": 1.253, "step": 5117 }, { "epoch": 0.4488065012489227, "grad_norm": 0.053955078125, "learning_rate": 0.0020838869788551754, "loss": 1.1648, "step": 5118 }, { "epoch": 0.44889419302329725, "grad_norm": 0.0732421875, "learning_rate": 0.0020835009038200425, "loss": 1.2086, "step": 5119 }, { "epoch": 0.44898188479767176, "grad_norm": 0.052978515625, "learning_rate": 0.002083114789250746, "loss": 1.1788, "step": 5120 }, { "epoch": 0.4490695765720463, "grad_norm": 0.058349609375, "learning_rate": 0.0020827286351824997, "loss": 1.1511, "step": 5121 }, { "epoch": 0.4491572683464209, "grad_norm": 0.06494140625, "learning_rate": 0.0020823424416505187, "loss": 1.1482, "step": 5122 }, { "epoch": 0.4492449601207954, "grad_norm": 0.068359375, "learning_rate": 0.002081956208690022, "loss": 1.1585, "step": 5123 }, { "epoch": 0.44933265189516997, "grad_norm": 0.09130859375, "learning_rate": 0.0020815699363362357, "loss": 1.2194, "step": 5124 }, { "epoch": 0.44942034366954453, "grad_norm": 0.06103515625, "learning_rate": 0.0020811836246243842, "loss": 1.2303, "step": 5125 }, { "epoch": 0.44950803544391904, "grad_norm": 0.09619140625, "learning_rate": 0.0020807972735896996, "loss": 1.2527, "step": 5126 }, { "epoch": 0.4495957272182936, "grad_norm": 0.055908203125, "learning_rate": 0.0020804108832674162, "loss": 1.2553, "step": 5127 }, { "epoch": 0.44968341899266817, "grad_norm": 0.11474609375, "learning_rate": 0.0020800244536927706, "loss": 1.1848, "step": 5128 }, { "epoch": 0.4497711107670427, "grad_norm": 0.06982421875, "learning_rate": 0.0020796379849010054, "loss": 1.2001, "step": 5129 }, { "epoch": 0.44985880254141725, "grad_norm": 0.12353515625, "learning_rate": 0.002079251476927365, "loss": 1.1786, "step": 5130 }, { "epoch": 0.44994649431579176, "grad_norm": 0.109375, "learning_rate": 0.0020788649298070978, "loss": 1.2076, "step": 5131 }, { "epoch": 0.4500341860901663, "grad_norm": 0.1591796875, "learning_rate": 0.002078478343575456, "loss": 1.2007, "step": 5132 }, { "epoch": 0.4501218778645409, "grad_norm": 0.1220703125, "learning_rate": 0.0020780917182676955, "loss": 1.2647, "step": 5133 }, { "epoch": 0.4502095696389154, "grad_norm": 0.162109375, "learning_rate": 0.002077705053919076, "loss": 1.2293, "step": 5134 }, { "epoch": 0.45029726141328996, "grad_norm": 0.1376953125, "learning_rate": 0.002077318350564859, "loss": 1.2205, "step": 5135 }, { "epoch": 0.4503849531876645, "grad_norm": 0.07763671875, "learning_rate": 0.002076931608240312, "loss": 1.2414, "step": 5136 }, { "epoch": 0.45047264496203904, "grad_norm": 0.10205078125, "learning_rate": 0.002076544826980704, "loss": 1.2049, "step": 5137 }, { "epoch": 0.4505603367364136, "grad_norm": 0.072265625, "learning_rate": 0.002076158006821309, "loss": 1.1605, "step": 5138 }, { "epoch": 0.45064802851078817, "grad_norm": 0.080078125, "learning_rate": 0.0020757711477974046, "loss": 1.2569, "step": 5139 }, { "epoch": 0.4507357202851627, "grad_norm": 0.055419921875, "learning_rate": 0.0020753842499442697, "loss": 1.2786, "step": 5140 }, { "epoch": 0.45082341205953724, "grad_norm": 0.07763671875, "learning_rate": 0.0020749973132971897, "loss": 1.2143, "step": 5141 }, { "epoch": 0.45091110383391175, "grad_norm": 0.052734375, "learning_rate": 0.002074610337891452, "loss": 1.1579, "step": 5142 }, { "epoch": 0.4509987956082863, "grad_norm": 0.083984375, "learning_rate": 0.0020742233237623473, "loss": 1.1752, "step": 5143 }, { "epoch": 0.4510864873826609, "grad_norm": 0.057861328125, "learning_rate": 0.0020738362709451707, "loss": 1.2108, "step": 5144 }, { "epoch": 0.4511741791570354, "grad_norm": 0.076171875, "learning_rate": 0.002073449179475221, "loss": 1.2039, "step": 5145 }, { "epoch": 0.45126187093140996, "grad_norm": 0.058837890625, "learning_rate": 0.0020730620493877978, "loss": 1.2062, "step": 5146 }, { "epoch": 0.4513495627057845, "grad_norm": 0.07080078125, "learning_rate": 0.002072674880718209, "loss": 1.2344, "step": 5147 }, { "epoch": 0.45143725448015903, "grad_norm": 0.055908203125, "learning_rate": 0.0020722876735017617, "loss": 1.1818, "step": 5148 }, { "epoch": 0.4515249462545336, "grad_norm": 0.099609375, "learning_rate": 0.002071900427773769, "loss": 1.2833, "step": 5149 }, { "epoch": 0.45161263802890816, "grad_norm": 0.0654296875, "learning_rate": 0.0020715131435695465, "loss": 1.2034, "step": 5150 }, { "epoch": 0.4517003298032827, "grad_norm": 0.083984375, "learning_rate": 0.0020711258209244127, "loss": 1.1719, "step": 5151 }, { "epoch": 0.45178802157765724, "grad_norm": 0.060546875, "learning_rate": 0.002070738459873692, "loss": 1.1998, "step": 5152 }, { "epoch": 0.4518757133520318, "grad_norm": 0.052001953125, "learning_rate": 0.0020703510604527095, "loss": 1.1755, "step": 5153 }, { "epoch": 0.4519634051264063, "grad_norm": 0.06591796875, "learning_rate": 0.0020699636226967954, "loss": 1.1913, "step": 5154 }, { "epoch": 0.4520510969007809, "grad_norm": 0.052978515625, "learning_rate": 0.002069576146641283, "loss": 1.2001, "step": 5155 }, { "epoch": 0.4521387886751554, "grad_norm": 0.061767578125, "learning_rate": 0.0020691886323215094, "loss": 1.2245, "step": 5156 }, { "epoch": 0.45222648044952996, "grad_norm": 0.06298828125, "learning_rate": 0.002068801079772815, "loss": 1.2008, "step": 5157 }, { "epoch": 0.4523141722239045, "grad_norm": 0.061279296875, "learning_rate": 0.0020684134890305425, "loss": 1.2585, "step": 5158 }, { "epoch": 0.45240186399827903, "grad_norm": 0.0771484375, "learning_rate": 0.00206802586013004, "loss": 1.242, "step": 5159 }, { "epoch": 0.4524895557726536, "grad_norm": 0.08154296875, "learning_rate": 0.0020676381931066584, "loss": 1.2455, "step": 5160 }, { "epoch": 0.45257724754702816, "grad_norm": 0.08349609375, "learning_rate": 0.002067250487995752, "loss": 1.2081, "step": 5161 }, { "epoch": 0.45266493932140267, "grad_norm": 0.062255859375, "learning_rate": 0.0020668627448326778, "loss": 1.2285, "step": 5162 }, { "epoch": 0.45275263109577724, "grad_norm": 0.08642578125, "learning_rate": 0.0020664749636527973, "loss": 1.2195, "step": 5163 }, { "epoch": 0.4528403228701518, "grad_norm": 0.0771484375, "learning_rate": 0.0020660871444914764, "loss": 1.2171, "step": 5164 }, { "epoch": 0.4529280146445263, "grad_norm": 0.06787109375, "learning_rate": 0.0020656992873840803, "loss": 1.2546, "step": 5165 }, { "epoch": 0.4530157064189009, "grad_norm": 0.060546875, "learning_rate": 0.0020653113923659837, "loss": 1.2479, "step": 5166 }, { "epoch": 0.4531033981932754, "grad_norm": 0.0791015625, "learning_rate": 0.0020649234594725596, "loss": 1.2058, "step": 5167 }, { "epoch": 0.45319108996764995, "grad_norm": 0.07373046875, "learning_rate": 0.0020645354887391873, "loss": 1.1995, "step": 5168 }, { "epoch": 0.4532787817420245, "grad_norm": 0.115234375, "learning_rate": 0.0020641474802012482, "loss": 1.2566, "step": 5169 }, { "epoch": 0.453366473516399, "grad_norm": 0.0625, "learning_rate": 0.0020637594338941286, "loss": 1.2026, "step": 5170 }, { "epoch": 0.4534541652907736, "grad_norm": 0.1298828125, "learning_rate": 0.002063371349853216, "loss": 1.1348, "step": 5171 }, { "epoch": 0.45354185706514816, "grad_norm": 0.07275390625, "learning_rate": 0.002062983228113904, "loss": 1.2273, "step": 5172 }, { "epoch": 0.45362954883952267, "grad_norm": 0.119140625, "learning_rate": 0.0020625950687115876, "loss": 1.2069, "step": 5173 }, { "epoch": 0.45371724061389723, "grad_norm": 0.062255859375, "learning_rate": 0.002062206871681666, "loss": 1.1835, "step": 5174 }, { "epoch": 0.4538049323882718, "grad_norm": 0.0712890625, "learning_rate": 0.002061818637059542, "loss": 1.1715, "step": 5175 }, { "epoch": 0.4538926241626463, "grad_norm": 0.05615234375, "learning_rate": 0.002061430364880621, "loss": 1.2171, "step": 5176 }, { "epoch": 0.4539803159370209, "grad_norm": 0.078125, "learning_rate": 0.002061042055180313, "loss": 1.2793, "step": 5177 }, { "epoch": 0.4540680077113954, "grad_norm": 0.1123046875, "learning_rate": 0.0020606537079940306, "loss": 1.2094, "step": 5178 }, { "epoch": 0.45415569948576995, "grad_norm": 0.07666015625, "learning_rate": 0.00206026532335719, "loss": 1.208, "step": 5179 }, { "epoch": 0.4542433912601445, "grad_norm": 0.06396484375, "learning_rate": 0.002059876901305211, "loss": 1.1985, "step": 5180 }, { "epoch": 0.454331083034519, "grad_norm": 0.05419921875, "learning_rate": 0.0020594884418735173, "loss": 1.2342, "step": 5181 }, { "epoch": 0.4544187748088936, "grad_norm": 0.064453125, "learning_rate": 0.0020590999450975345, "loss": 1.1612, "step": 5182 }, { "epoch": 0.45450646658326815, "grad_norm": 0.07958984375, "learning_rate": 0.0020587114110126926, "loss": 1.2779, "step": 5183 }, { "epoch": 0.45459415835764266, "grad_norm": 0.060546875, "learning_rate": 0.0020583228396544254, "loss": 1.2161, "step": 5184 }, { "epoch": 0.45468185013201723, "grad_norm": 0.06591796875, "learning_rate": 0.002057934231058169, "loss": 1.2493, "step": 5185 }, { "epoch": 0.4547695419063918, "grad_norm": 0.06640625, "learning_rate": 0.002057545585259364, "loss": 1.2343, "step": 5186 }, { "epoch": 0.4548572336807663, "grad_norm": 0.0556640625, "learning_rate": 0.0020571569022934536, "loss": 1.142, "step": 5187 }, { "epoch": 0.45494492545514087, "grad_norm": 0.057373046875, "learning_rate": 0.0020567681821958843, "loss": 1.1968, "step": 5188 }, { "epoch": 0.45503261722951543, "grad_norm": 0.1455078125, "learning_rate": 0.0020563794250021074, "loss": 1.2221, "step": 5189 }, { "epoch": 0.45512030900388994, "grad_norm": 0.054443359375, "learning_rate": 0.0020559906307475767, "loss": 1.2018, "step": 5190 }, { "epoch": 0.4552080007782645, "grad_norm": 0.1669921875, "learning_rate": 0.0020556017994677474, "loss": 1.2203, "step": 5191 }, { "epoch": 0.455295692552639, "grad_norm": 0.0654296875, "learning_rate": 0.002055212931198081, "loss": 1.2265, "step": 5192 }, { "epoch": 0.4553833843270136, "grad_norm": 0.087890625, "learning_rate": 0.0020548240259740417, "loss": 1.2327, "step": 5193 }, { "epoch": 0.45547107610138815, "grad_norm": 0.055419921875, "learning_rate": 0.0020544350838310964, "loss": 1.2877, "step": 5194 }, { "epoch": 0.45555876787576266, "grad_norm": 0.0615234375, "learning_rate": 0.002054046104804715, "loss": 1.261, "step": 5195 }, { "epoch": 0.4556464596501372, "grad_norm": 0.0625, "learning_rate": 0.0020536570889303726, "loss": 1.2516, "step": 5196 }, { "epoch": 0.4557341514245118, "grad_norm": 0.06591796875, "learning_rate": 0.0020532680362435455, "loss": 1.2434, "step": 5197 }, { "epoch": 0.4558218431988863, "grad_norm": 0.0673828125, "learning_rate": 0.0020528789467797147, "loss": 1.2016, "step": 5198 }, { "epoch": 0.45590953497326087, "grad_norm": 0.07373046875, "learning_rate": 0.002052489820574364, "loss": 1.192, "step": 5199 }, { "epoch": 0.45599722674763543, "grad_norm": 0.062255859375, "learning_rate": 0.00205210065766298, "loss": 1.1508, "step": 5200 }, { "epoch": 0.45608491852200994, "grad_norm": 0.06640625, "learning_rate": 0.0020517114580810543, "loss": 1.139, "step": 5201 }, { "epoch": 0.4561726102963845, "grad_norm": 0.08544921875, "learning_rate": 0.0020513222218640813, "loss": 1.2424, "step": 5202 }, { "epoch": 0.456260302070759, "grad_norm": 0.0576171875, "learning_rate": 0.002050932949047557, "loss": 1.2428, "step": 5203 }, { "epoch": 0.4563479938451336, "grad_norm": 0.08740234375, "learning_rate": 0.0020505436396669827, "loss": 1.2475, "step": 5204 }, { "epoch": 0.45643568561950815, "grad_norm": 0.054931640625, "learning_rate": 0.002050154293757863, "loss": 1.1788, "step": 5205 }, { "epoch": 0.45652337739388266, "grad_norm": 0.07275390625, "learning_rate": 0.002049764911355704, "loss": 1.1909, "step": 5206 }, { "epoch": 0.4566110691682572, "grad_norm": 0.060302734375, "learning_rate": 0.0020493754924960175, "loss": 1.2608, "step": 5207 }, { "epoch": 0.4566987609426318, "grad_norm": 0.06884765625, "learning_rate": 0.002048986037214317, "loss": 1.1491, "step": 5208 }, { "epoch": 0.4567864527170063, "grad_norm": 0.06201171875, "learning_rate": 0.0020485965455461197, "loss": 1.1984, "step": 5209 }, { "epoch": 0.45687414449138086, "grad_norm": 0.05224609375, "learning_rate": 0.0020482070175269467, "loss": 1.2296, "step": 5210 }, { "epoch": 0.45696183626575543, "grad_norm": 0.060302734375, "learning_rate": 0.0020478174531923213, "loss": 1.224, "step": 5211 }, { "epoch": 0.45704952804012994, "grad_norm": 0.07275390625, "learning_rate": 0.002047427852577772, "loss": 1.1921, "step": 5212 }, { "epoch": 0.4571372198145045, "grad_norm": 0.083984375, "learning_rate": 0.0020470382157188275, "loss": 1.2069, "step": 5213 }, { "epoch": 0.457224911588879, "grad_norm": 0.07373046875, "learning_rate": 0.0020466485426510234, "loss": 1.182, "step": 5214 }, { "epoch": 0.4573126033632536, "grad_norm": 0.0771484375, "learning_rate": 0.002046258833409896, "loss": 1.1648, "step": 5215 }, { "epoch": 0.45740029513762814, "grad_norm": 0.09521484375, "learning_rate": 0.0020458690880309854, "loss": 1.314, "step": 5216 }, { "epoch": 0.45748798691200265, "grad_norm": 0.06494140625, "learning_rate": 0.002045479306549836, "loss": 1.2894, "step": 5217 }, { "epoch": 0.4575756786863772, "grad_norm": 0.060546875, "learning_rate": 0.002045089489001995, "loss": 1.2791, "step": 5218 }, { "epoch": 0.4576633704607518, "grad_norm": 0.1015625, "learning_rate": 0.002044699635423013, "loss": 1.215, "step": 5219 }, { "epoch": 0.4577510622351263, "grad_norm": 0.05908203125, "learning_rate": 0.0020443097458484427, "loss": 1.2419, "step": 5220 }, { "epoch": 0.45783875400950086, "grad_norm": 0.06005859375, "learning_rate": 0.0020439198203138415, "loss": 1.2335, "step": 5221 }, { "epoch": 0.4579264457838754, "grad_norm": 0.06494140625, "learning_rate": 0.0020435298588547695, "loss": 1.1946, "step": 5222 }, { "epoch": 0.45801413755824993, "grad_norm": 0.048095703125, "learning_rate": 0.0020431398615067906, "loss": 1.2183, "step": 5223 }, { "epoch": 0.4581018293326245, "grad_norm": 0.07470703125, "learning_rate": 0.0020427498283054706, "loss": 1.2158, "step": 5224 }, { "epoch": 0.458189521106999, "grad_norm": 0.058837890625, "learning_rate": 0.002042359759286381, "loss": 1.1895, "step": 5225 }, { "epoch": 0.4582772128813736, "grad_norm": 0.087890625, "learning_rate": 0.0020419696544850935, "loss": 1.2523, "step": 5226 }, { "epoch": 0.45836490465574814, "grad_norm": 0.08154296875, "learning_rate": 0.0020415795139371857, "loss": 1.1819, "step": 5227 }, { "epoch": 0.45845259643012265, "grad_norm": 0.057373046875, "learning_rate": 0.0020411893376782366, "loss": 1.1826, "step": 5228 }, { "epoch": 0.4585402882044972, "grad_norm": 0.0810546875, "learning_rate": 0.00204079912574383, "loss": 1.2216, "step": 5229 }, { "epoch": 0.4586279799788718, "grad_norm": 0.078125, "learning_rate": 0.0020404088781695523, "loss": 1.3002, "step": 5230 }, { "epoch": 0.4587156717532463, "grad_norm": 0.07568359375, "learning_rate": 0.0020400185949909925, "loss": 1.268, "step": 5231 }, { "epoch": 0.45880336352762086, "grad_norm": 0.058349609375, "learning_rate": 0.002039628276243744, "loss": 1.2036, "step": 5232 }, { "epoch": 0.4588910553019954, "grad_norm": 0.064453125, "learning_rate": 0.002039237921963402, "loss": 1.2101, "step": 5233 }, { "epoch": 0.45897874707636993, "grad_norm": 0.1005859375, "learning_rate": 0.0020388475321855668, "loss": 1.2491, "step": 5234 }, { "epoch": 0.4590664388507445, "grad_norm": 0.053466796875, "learning_rate": 0.0020384571069458402, "loss": 1.2173, "step": 5235 }, { "epoch": 0.45915413062511906, "grad_norm": 0.099609375, "learning_rate": 0.002038066646279828, "loss": 1.2224, "step": 5236 }, { "epoch": 0.45924182239949357, "grad_norm": 0.06982421875, "learning_rate": 0.00203767615022314, "loss": 1.2058, "step": 5237 }, { "epoch": 0.45932951417386814, "grad_norm": 0.1416015625, "learning_rate": 0.0020372856188113878, "loss": 1.2194, "step": 5238 }, { "epoch": 0.45941720594824265, "grad_norm": 0.058349609375, "learning_rate": 0.0020368950520801865, "loss": 1.1946, "step": 5239 }, { "epoch": 0.4595048977226172, "grad_norm": 0.07373046875, "learning_rate": 0.0020365044500651555, "loss": 1.2024, "step": 5240 }, { "epoch": 0.4595925894969918, "grad_norm": 0.08642578125, "learning_rate": 0.0020361138128019165, "loss": 1.2052, "step": 5241 }, { "epoch": 0.4596802812713663, "grad_norm": 0.076171875, "learning_rate": 0.002035723140326094, "loss": 1.2265, "step": 5242 }, { "epoch": 0.45976797304574085, "grad_norm": 0.0849609375, "learning_rate": 0.002035332432673318, "loss": 1.1668, "step": 5243 }, { "epoch": 0.4598556648201154, "grad_norm": 0.054443359375, "learning_rate": 0.0020349416898792177, "loss": 1.2934, "step": 5244 }, { "epoch": 0.4599433565944899, "grad_norm": 0.181640625, "learning_rate": 0.0020345509119794295, "loss": 1.2213, "step": 5245 }, { "epoch": 0.4600310483688645, "grad_norm": 0.057373046875, "learning_rate": 0.0020341600990095906, "loss": 1.2676, "step": 5246 }, { "epoch": 0.46011874014323906, "grad_norm": 0.1298828125, "learning_rate": 0.0020337692510053428, "loss": 1.1873, "step": 5247 }, { "epoch": 0.46020643191761357, "grad_norm": 0.0732421875, "learning_rate": 0.0020333783680023293, "loss": 1.1854, "step": 5248 }, { "epoch": 0.46029412369198813, "grad_norm": 0.09033203125, "learning_rate": 0.0020329874500361993, "loss": 1.2629, "step": 5249 }, { "epoch": 0.46038181546636264, "grad_norm": 0.06884765625, "learning_rate": 0.002032596497142602, "loss": 1.2275, "step": 5250 }, { "epoch": 0.4604695072407372, "grad_norm": 0.08056640625, "learning_rate": 0.002032205509357192, "loss": 1.2154, "step": 5251 }, { "epoch": 0.4605571990151118, "grad_norm": 0.0703125, "learning_rate": 0.002031814486715626, "loss": 1.2088, "step": 5252 }, { "epoch": 0.4606448907894863, "grad_norm": 0.068359375, "learning_rate": 0.002031423429253564, "loss": 1.2498, "step": 5253 }, { "epoch": 0.46073258256386085, "grad_norm": 0.07958984375, "learning_rate": 0.0020310323370066707, "loss": 1.2239, "step": 5254 }, { "epoch": 0.4608202743382354, "grad_norm": 0.06103515625, "learning_rate": 0.0020306412100106115, "loss": 1.1558, "step": 5255 }, { "epoch": 0.4609079661126099, "grad_norm": 0.06396484375, "learning_rate": 0.0020302500483010567, "loss": 1.2117, "step": 5256 }, { "epoch": 0.4609956578869845, "grad_norm": 0.057861328125, "learning_rate": 0.002029858851913679, "loss": 1.1848, "step": 5257 }, { "epoch": 0.46108334966135905, "grad_norm": 0.055908203125, "learning_rate": 0.002029467620884155, "loss": 1.2145, "step": 5258 }, { "epoch": 0.46117104143573356, "grad_norm": 0.068359375, "learning_rate": 0.002029076355248163, "loss": 1.2422, "step": 5259 }, { "epoch": 0.46125873321010813, "grad_norm": 0.07421875, "learning_rate": 0.002028685055041386, "loss": 1.1608, "step": 5260 }, { "epoch": 0.46134642498448264, "grad_norm": 0.07568359375, "learning_rate": 0.0020282937202995097, "loss": 1.1599, "step": 5261 }, { "epoch": 0.4614341167588572, "grad_norm": 0.061767578125, "learning_rate": 0.002027902351058223, "loss": 1.2362, "step": 5262 }, { "epoch": 0.46152180853323177, "grad_norm": 0.07373046875, "learning_rate": 0.0020275109473532173, "loss": 1.2175, "step": 5263 }, { "epoch": 0.4616095003076063, "grad_norm": 0.06982421875, "learning_rate": 0.0020271195092201873, "loss": 1.213, "step": 5264 }, { "epoch": 0.46169719208198085, "grad_norm": 0.0791015625, "learning_rate": 0.0020267280366948323, "loss": 1.2681, "step": 5265 }, { "epoch": 0.4617848838563554, "grad_norm": 0.07421875, "learning_rate": 0.002026336529812852, "loss": 1.2085, "step": 5266 }, { "epoch": 0.4618725756307299, "grad_norm": 0.0654296875, "learning_rate": 0.0020259449886099526, "loss": 1.1721, "step": 5267 }, { "epoch": 0.4619602674051045, "grad_norm": 0.06494140625, "learning_rate": 0.0020255534131218404, "loss": 1.1856, "step": 5268 }, { "epoch": 0.46204795917947905, "grad_norm": 0.054443359375, "learning_rate": 0.002025161803384226, "loss": 1.1916, "step": 5269 }, { "epoch": 0.46213565095385356, "grad_norm": 0.087890625, "learning_rate": 0.002024770159432824, "loss": 1.213, "step": 5270 }, { "epoch": 0.4622233427282281, "grad_norm": 0.064453125, "learning_rate": 0.0020243784813033503, "loss": 1.2437, "step": 5271 }, { "epoch": 0.4623110345026027, "grad_norm": 0.0869140625, "learning_rate": 0.0020239867690315266, "loss": 1.1432, "step": 5272 }, { "epoch": 0.4623987262769772, "grad_norm": 0.11865234375, "learning_rate": 0.002023595022653074, "loss": 1.229, "step": 5273 }, { "epoch": 0.46248641805135177, "grad_norm": 0.11865234375, "learning_rate": 0.002023203242203721, "loss": 1.1857, "step": 5274 }, { "epoch": 0.4625741098257263, "grad_norm": 0.123046875, "learning_rate": 0.0020228114277191945, "loss": 1.2435, "step": 5275 }, { "epoch": 0.46266180160010084, "grad_norm": 0.0908203125, "learning_rate": 0.002022419579235229, "loss": 1.2641, "step": 5276 }, { "epoch": 0.4627494933744754, "grad_norm": 0.09814453125, "learning_rate": 0.0020220276967875587, "loss": 1.1311, "step": 5277 }, { "epoch": 0.4628371851488499, "grad_norm": 0.0615234375, "learning_rate": 0.0020216357804119234, "loss": 1.2135, "step": 5278 }, { "epoch": 0.4629248769232245, "grad_norm": 0.056884765625, "learning_rate": 0.0020212438301440636, "loss": 1.248, "step": 5279 }, { "epoch": 0.46301256869759905, "grad_norm": 0.056640625, "learning_rate": 0.002020851846019726, "loss": 1.1983, "step": 5280 }, { "epoch": 0.46310026047197356, "grad_norm": 0.06298828125, "learning_rate": 0.002020459828074656, "loss": 1.2226, "step": 5281 }, { "epoch": 0.4631879522463481, "grad_norm": 0.046630859375, "learning_rate": 0.002020067776344606, "loss": 1.1094, "step": 5282 }, { "epoch": 0.4632756440207227, "grad_norm": 0.060302734375, "learning_rate": 0.0020196756908653314, "loss": 1.236, "step": 5283 }, { "epoch": 0.4633633357950972, "grad_norm": 0.0595703125, "learning_rate": 0.0020192835716725875, "loss": 1.29, "step": 5284 }, { "epoch": 0.46345102756947176, "grad_norm": 0.07177734375, "learning_rate": 0.0020188914188021354, "loss": 1.2298, "step": 5285 }, { "epoch": 0.4635387193438463, "grad_norm": 0.1044921875, "learning_rate": 0.0020184992322897378, "loss": 1.2043, "step": 5286 }, { "epoch": 0.46362641111822084, "grad_norm": 0.076171875, "learning_rate": 0.0020181070121711615, "loss": 1.1622, "step": 5287 }, { "epoch": 0.4637141028925954, "grad_norm": 0.0771484375, "learning_rate": 0.0020177147584821763, "loss": 1.2014, "step": 5288 }, { "epoch": 0.4638017946669699, "grad_norm": 0.07373046875, "learning_rate": 0.002017322471258554, "loss": 1.1595, "step": 5289 }, { "epoch": 0.4638894864413445, "grad_norm": 0.0673828125, "learning_rate": 0.0020169301505360707, "loss": 1.1835, "step": 5290 }, { "epoch": 0.46397717821571904, "grad_norm": 0.1171875, "learning_rate": 0.0020165377963505047, "loss": 1.2518, "step": 5291 }, { "epoch": 0.46406486999009355, "grad_norm": 0.0712890625, "learning_rate": 0.002016145408737639, "loss": 1.2287, "step": 5292 }, { "epoch": 0.4641525617644681, "grad_norm": 0.107421875, "learning_rate": 0.002015752987733256, "loss": 1.2611, "step": 5293 }, { "epoch": 0.4642402535388427, "grad_norm": 0.072265625, "learning_rate": 0.002015360533373145, "loss": 1.3002, "step": 5294 }, { "epoch": 0.4643279453132172, "grad_norm": 0.07763671875, "learning_rate": 0.0020149680456930962, "loss": 1.2368, "step": 5295 }, { "epoch": 0.46441563708759176, "grad_norm": 0.10791015625, "learning_rate": 0.002014575524728904, "loss": 1.23, "step": 5296 }, { "epoch": 0.46450332886196627, "grad_norm": 0.06005859375, "learning_rate": 0.0020141829705163654, "loss": 1.2036, "step": 5297 }, { "epoch": 0.46459102063634083, "grad_norm": 0.087890625, "learning_rate": 0.00201379038309128, "loss": 1.2055, "step": 5298 }, { "epoch": 0.4646787124107154, "grad_norm": 0.052490234375, "learning_rate": 0.00201339776248945, "loss": 1.2097, "step": 5299 }, { "epoch": 0.4647664041850899, "grad_norm": 0.057861328125, "learning_rate": 0.0020130051087466835, "loss": 1.1968, "step": 5300 }, { "epoch": 0.4648540959594645, "grad_norm": 0.053955078125, "learning_rate": 0.0020126124218987876, "loss": 1.1962, "step": 5301 }, { "epoch": 0.46494178773383904, "grad_norm": 0.07470703125, "learning_rate": 0.002012219701981575, "loss": 1.1888, "step": 5302 }, { "epoch": 0.46502947950821355, "grad_norm": 0.046142578125, "learning_rate": 0.00201182694903086, "loss": 1.1466, "step": 5303 }, { "epoch": 0.4651171712825881, "grad_norm": 0.0615234375, "learning_rate": 0.0020114341630824615, "loss": 1.2171, "step": 5304 }, { "epoch": 0.4652048630569627, "grad_norm": 0.058349609375, "learning_rate": 0.002011041344172201, "loss": 1.2182, "step": 5305 }, { "epoch": 0.4652925548313372, "grad_norm": 0.054443359375, "learning_rate": 0.002010648492335901, "loss": 1.1422, "step": 5306 }, { "epoch": 0.46538024660571176, "grad_norm": 0.1064453125, "learning_rate": 0.0020102556076093906, "loss": 1.2273, "step": 5307 }, { "epoch": 0.4654679383800863, "grad_norm": 0.057861328125, "learning_rate": 0.002009862690028498, "loss": 1.257, "step": 5308 }, { "epoch": 0.46555563015446083, "grad_norm": 0.11376953125, "learning_rate": 0.0020094697396290575, "loss": 1.2696, "step": 5309 }, { "epoch": 0.4656433219288354, "grad_norm": 0.058837890625, "learning_rate": 0.0020090767564469045, "loss": 1.2189, "step": 5310 }, { "epoch": 0.4657310137032099, "grad_norm": 0.08154296875, "learning_rate": 0.002008683740517878, "loss": 1.2875, "step": 5311 }, { "epoch": 0.46581870547758447, "grad_norm": 0.05078125, "learning_rate": 0.002008290691877821, "loss": 1.3165, "step": 5312 }, { "epoch": 0.46590639725195904, "grad_norm": 0.080078125, "learning_rate": 0.0020078976105625773, "loss": 1.1964, "step": 5313 }, { "epoch": 0.46599408902633355, "grad_norm": 0.0517578125, "learning_rate": 0.0020075044966079954, "loss": 1.2166, "step": 5314 }, { "epoch": 0.4660817808007081, "grad_norm": 0.055908203125, "learning_rate": 0.0020071113500499263, "loss": 1.2053, "step": 5315 }, { "epoch": 0.4661694725750827, "grad_norm": 0.0576171875, "learning_rate": 0.002006718170924224, "loss": 1.1855, "step": 5316 }, { "epoch": 0.4662571643494572, "grad_norm": 0.049560546875, "learning_rate": 0.0020063249592667458, "loss": 1.2673, "step": 5317 }, { "epoch": 0.46634485612383175, "grad_norm": 0.058837890625, "learning_rate": 0.0020059317151133508, "loss": 1.1611, "step": 5318 }, { "epoch": 0.4664325478982063, "grad_norm": 0.05419921875, "learning_rate": 0.002005538438499902, "loss": 1.3236, "step": 5319 }, { "epoch": 0.4665202396725808, "grad_norm": 0.0888671875, "learning_rate": 0.002005145129462266, "loss": 1.1935, "step": 5320 }, { "epoch": 0.4666079314469554, "grad_norm": 0.05029296875, "learning_rate": 0.0020047517880363106, "loss": 1.2116, "step": 5321 }, { "epoch": 0.4666956232213299, "grad_norm": 0.09033203125, "learning_rate": 0.002004358414257908, "loss": 1.1729, "step": 5322 }, { "epoch": 0.46678331499570447, "grad_norm": 0.06298828125, "learning_rate": 0.002003965008162932, "loss": 1.2613, "step": 5323 }, { "epoch": 0.46687100677007903, "grad_norm": 0.06591796875, "learning_rate": 0.002003571569787262, "loss": 1.2101, "step": 5324 }, { "epoch": 0.46695869854445354, "grad_norm": 0.05908203125, "learning_rate": 0.0020031780991667776, "loss": 1.2245, "step": 5325 }, { "epoch": 0.4670463903188281, "grad_norm": 0.059326171875, "learning_rate": 0.002002784596337362, "loss": 1.2423, "step": 5326 }, { "epoch": 0.4671340820932027, "grad_norm": 0.07421875, "learning_rate": 0.002002391061334902, "loss": 1.2405, "step": 5327 }, { "epoch": 0.4672217738675772, "grad_norm": 0.09716796875, "learning_rate": 0.002001997494195287, "loss": 1.1578, "step": 5328 }, { "epoch": 0.46730946564195175, "grad_norm": 0.09716796875, "learning_rate": 0.002001603894954409, "loss": 1.1504, "step": 5329 }, { "epoch": 0.4673971574163263, "grad_norm": 0.12451171875, "learning_rate": 0.002001210263648163, "loss": 1.1721, "step": 5330 }, { "epoch": 0.4674848491907008, "grad_norm": 0.10205078125, "learning_rate": 0.0020008166003124485, "loss": 1.2225, "step": 5331 }, { "epoch": 0.4675725409650754, "grad_norm": 0.12109375, "learning_rate": 0.002000422904983165, "loss": 1.2093, "step": 5332 }, { "epoch": 0.4676602327394499, "grad_norm": 0.06494140625, "learning_rate": 0.002000029177696218, "loss": 1.2262, "step": 5333 }, { "epoch": 0.46774792451382446, "grad_norm": 0.076171875, "learning_rate": 0.001999635418487513, "loss": 1.2608, "step": 5334 }, { "epoch": 0.46783561628819903, "grad_norm": 0.0615234375, "learning_rate": 0.00199924162739296, "loss": 1.1869, "step": 5335 }, { "epoch": 0.46792330806257354, "grad_norm": 0.064453125, "learning_rate": 0.001998847804448473, "loss": 1.2006, "step": 5336 }, { "epoch": 0.4680109998369481, "grad_norm": 0.076171875, "learning_rate": 0.001998453949689966, "loss": 1.2898, "step": 5337 }, { "epoch": 0.46809869161132267, "grad_norm": 0.11279296875, "learning_rate": 0.0019980600631533587, "loss": 1.2226, "step": 5338 }, { "epoch": 0.4681863833856972, "grad_norm": 0.053466796875, "learning_rate": 0.001997666144874572, "loss": 1.1754, "step": 5339 }, { "epoch": 0.46827407516007175, "grad_norm": 0.11376953125, "learning_rate": 0.0019972721948895308, "loss": 1.1756, "step": 5340 }, { "epoch": 0.4683617669344463, "grad_norm": 0.06640625, "learning_rate": 0.0019968782132341613, "loss": 1.2428, "step": 5341 }, { "epoch": 0.4684494587088208, "grad_norm": 0.07958984375, "learning_rate": 0.001996484199944395, "loss": 1.2068, "step": 5342 }, { "epoch": 0.4685371504831954, "grad_norm": 0.061767578125, "learning_rate": 0.0019960901550561635, "loss": 1.1748, "step": 5343 }, { "epoch": 0.46862484225756995, "grad_norm": 0.05712890625, "learning_rate": 0.001995696078605403, "loss": 1.2204, "step": 5344 }, { "epoch": 0.46871253403194446, "grad_norm": 0.07470703125, "learning_rate": 0.0019953019706280537, "loss": 1.2236, "step": 5345 }, { "epoch": 0.468800225806319, "grad_norm": 0.061279296875, "learning_rate": 0.001994907831160055, "loss": 1.2072, "step": 5346 }, { "epoch": 0.46888791758069354, "grad_norm": 0.05810546875, "learning_rate": 0.0019945136602373533, "loss": 1.2561, "step": 5347 }, { "epoch": 0.4689756093550681, "grad_norm": 0.054443359375, "learning_rate": 0.001994119457895894, "loss": 1.1965, "step": 5348 }, { "epoch": 0.46906330112944267, "grad_norm": 0.06005859375, "learning_rate": 0.001993725224171629, "loss": 1.1925, "step": 5349 }, { "epoch": 0.4691509929038172, "grad_norm": 0.058349609375, "learning_rate": 0.001993330959100511, "loss": 1.1818, "step": 5350 }, { "epoch": 0.46923868467819174, "grad_norm": 0.068359375, "learning_rate": 0.0019929366627184967, "loss": 1.2653, "step": 5351 }, { "epoch": 0.4693263764525663, "grad_norm": 0.09326171875, "learning_rate": 0.0019925423350615427, "loss": 1.3172, "step": 5352 }, { "epoch": 0.4694140682269408, "grad_norm": 0.0732421875, "learning_rate": 0.0019921479761656124, "loss": 1.1954, "step": 5353 }, { "epoch": 0.4695017600013154, "grad_norm": 0.05810546875, "learning_rate": 0.00199175358606667, "loss": 1.1971, "step": 5354 }, { "epoch": 0.46958945177568995, "grad_norm": 0.119140625, "learning_rate": 0.001991359164800683, "loss": 1.1419, "step": 5355 }, { "epoch": 0.46967714355006446, "grad_norm": 0.07763671875, "learning_rate": 0.0019909647124036214, "loss": 1.2748, "step": 5356 }, { "epoch": 0.469764835324439, "grad_norm": 0.109375, "learning_rate": 0.0019905702289114576, "loss": 1.2055, "step": 5357 }, { "epoch": 0.46985252709881353, "grad_norm": 0.10302734375, "learning_rate": 0.0019901757143601685, "loss": 1.2398, "step": 5358 }, { "epoch": 0.4699402188731881, "grad_norm": 0.0703125, "learning_rate": 0.0019897811687857327, "loss": 1.2555, "step": 5359 }, { "epoch": 0.47002791064756266, "grad_norm": 0.11328125, "learning_rate": 0.0019893865922241314, "loss": 1.2522, "step": 5360 }, { "epoch": 0.4701156024219372, "grad_norm": 0.053955078125, "learning_rate": 0.0019889919847113483, "loss": 1.1941, "step": 5361 }, { "epoch": 0.47020329419631174, "grad_norm": 0.11279296875, "learning_rate": 0.001988597346283372, "loss": 1.2167, "step": 5362 }, { "epoch": 0.4702909859706863, "grad_norm": 0.06494140625, "learning_rate": 0.001988202676976192, "loss": 1.2247, "step": 5363 }, { "epoch": 0.4703786777450608, "grad_norm": 0.1708984375, "learning_rate": 0.0019878079768258, "loss": 1.3217, "step": 5364 }, { "epoch": 0.4704663695194354, "grad_norm": 0.052001953125, "learning_rate": 0.0019874132458681934, "loss": 1.2529, "step": 5365 }, { "epoch": 0.47055406129380994, "grad_norm": 0.06884765625, "learning_rate": 0.0019870184841393693, "loss": 1.1921, "step": 5366 }, { "epoch": 0.47064175306818445, "grad_norm": 0.07568359375, "learning_rate": 0.0019866236916753297, "loss": 1.2275, "step": 5367 }, { "epoch": 0.470729444842559, "grad_norm": 0.055908203125, "learning_rate": 0.0019862288685120787, "loss": 1.1602, "step": 5368 }, { "epoch": 0.47081713661693353, "grad_norm": 0.0556640625, "learning_rate": 0.0019858340146856226, "loss": 1.2181, "step": 5369 }, { "epoch": 0.4709048283913081, "grad_norm": 0.056396484375, "learning_rate": 0.001985439130231971, "loss": 1.2253, "step": 5370 }, { "epoch": 0.47099252016568266, "grad_norm": 0.0517578125, "learning_rate": 0.001985044215187137, "loss": 1.1737, "step": 5371 }, { "epoch": 0.47108021194005717, "grad_norm": 0.052978515625, "learning_rate": 0.001984649269587135, "loss": 1.2304, "step": 5372 }, { "epoch": 0.47116790371443174, "grad_norm": 0.058349609375, "learning_rate": 0.0019842542934679843, "loss": 1.2461, "step": 5373 }, { "epoch": 0.4712555954888063, "grad_norm": 0.057373046875, "learning_rate": 0.001983859286865704, "loss": 1.219, "step": 5374 }, { "epoch": 0.4713432872631808, "grad_norm": 0.05224609375, "learning_rate": 0.0019834642498163194, "loss": 1.196, "step": 5375 }, { "epoch": 0.4714309790375554, "grad_norm": 0.0673828125, "learning_rate": 0.0019830691823558557, "loss": 1.2245, "step": 5376 }, { "epoch": 0.47151867081192994, "grad_norm": 0.0693359375, "learning_rate": 0.0019826740845203424, "loss": 1.2074, "step": 5377 }, { "epoch": 0.47160636258630445, "grad_norm": 0.0537109375, "learning_rate": 0.001982278956345811, "loss": 1.2248, "step": 5378 }, { "epoch": 0.471694054360679, "grad_norm": 0.07763671875, "learning_rate": 0.0019818837978682973, "loss": 1.2055, "step": 5379 }, { "epoch": 0.4717817461350536, "grad_norm": 0.10302734375, "learning_rate": 0.0019814886091238377, "loss": 1.2792, "step": 5380 }, { "epoch": 0.4718694379094281, "grad_norm": 0.04736328125, "learning_rate": 0.0019810933901484723, "loss": 1.1663, "step": 5381 }, { "epoch": 0.47195712968380266, "grad_norm": 0.07373046875, "learning_rate": 0.001980698140978245, "loss": 1.2381, "step": 5382 }, { "epoch": 0.47204482145817717, "grad_norm": 0.07861328125, "learning_rate": 0.0019803028616492005, "loss": 1.1869, "step": 5383 }, { "epoch": 0.47213251323255173, "grad_norm": 0.109375, "learning_rate": 0.001979907552197388, "loss": 1.193, "step": 5384 }, { "epoch": 0.4722202050069263, "grad_norm": 0.083984375, "learning_rate": 0.0019795122126588585, "loss": 1.1888, "step": 5385 }, { "epoch": 0.4723078967813008, "grad_norm": 0.07666015625, "learning_rate": 0.0019791168430696652, "loss": 1.242, "step": 5386 }, { "epoch": 0.47239558855567537, "grad_norm": 0.1064453125, "learning_rate": 0.001978721443465866, "loss": 1.2006, "step": 5387 }, { "epoch": 0.47248328033004994, "grad_norm": 0.06298828125, "learning_rate": 0.0019783260138835196, "loss": 1.3229, "step": 5388 }, { "epoch": 0.47257097210442445, "grad_norm": 0.1044921875, "learning_rate": 0.001977930554358688, "loss": 1.2101, "step": 5389 }, { "epoch": 0.472658663878799, "grad_norm": 0.045166015625, "learning_rate": 0.0019775350649274366, "loss": 1.1683, "step": 5390 }, { "epoch": 0.4727463556531736, "grad_norm": 0.0888671875, "learning_rate": 0.0019771395456258333, "loss": 1.2063, "step": 5391 }, { "epoch": 0.4728340474275481, "grad_norm": 0.051025390625, "learning_rate": 0.0019767439964899474, "loss": 1.2044, "step": 5392 }, { "epoch": 0.47292173920192265, "grad_norm": 0.064453125, "learning_rate": 0.001976348417555853, "loss": 1.2309, "step": 5393 }, { "epoch": 0.47300943097629716, "grad_norm": 0.07470703125, "learning_rate": 0.001975952808859625, "loss": 1.2115, "step": 5394 }, { "epoch": 0.47309712275067173, "grad_norm": 0.05810546875, "learning_rate": 0.0019755571704373424, "loss": 1.2185, "step": 5395 }, { "epoch": 0.4731848145250463, "grad_norm": 0.05859375, "learning_rate": 0.0019751615023250856, "loss": 1.2366, "step": 5396 }, { "epoch": 0.4732725062994208, "grad_norm": 0.062255859375, "learning_rate": 0.0019747658045589397, "loss": 1.1983, "step": 5397 }, { "epoch": 0.47336019807379537, "grad_norm": 0.048828125, "learning_rate": 0.0019743700771749913, "loss": 1.1653, "step": 5398 }, { "epoch": 0.47344788984816993, "grad_norm": 0.05126953125, "learning_rate": 0.0019739743202093285, "loss": 1.188, "step": 5399 }, { "epoch": 0.47353558162254444, "grad_norm": 0.07666015625, "learning_rate": 0.0019735785336980447, "loss": 1.1658, "step": 5400 }, { "epoch": 0.473623273396919, "grad_norm": 0.055908203125, "learning_rate": 0.001973182717677233, "loss": 1.1927, "step": 5401 }, { "epoch": 0.4737109651712936, "grad_norm": 0.05224609375, "learning_rate": 0.0019727868721829923, "loss": 1.2371, "step": 5402 }, { "epoch": 0.4737986569456681, "grad_norm": 0.07275390625, "learning_rate": 0.001972390997251422, "loss": 1.2674, "step": 5403 }, { "epoch": 0.47388634872004265, "grad_norm": 0.0888671875, "learning_rate": 0.0019719950929186253, "loss": 1.2601, "step": 5404 }, { "epoch": 0.47397404049441716, "grad_norm": 0.10302734375, "learning_rate": 0.001971599159220707, "loss": 1.1893, "step": 5405 }, { "epoch": 0.4740617322687917, "grad_norm": 0.06298828125, "learning_rate": 0.0019712031961937756, "loss": 1.1899, "step": 5406 }, { "epoch": 0.4741494240431663, "grad_norm": 0.10595703125, "learning_rate": 0.0019708072038739422, "loss": 1.2367, "step": 5407 }, { "epoch": 0.4742371158175408, "grad_norm": 0.0556640625, "learning_rate": 0.00197041118229732, "loss": 1.2481, "step": 5408 }, { "epoch": 0.47432480759191537, "grad_norm": 0.091796875, "learning_rate": 0.0019700151315000247, "loss": 1.2374, "step": 5409 }, { "epoch": 0.47441249936628993, "grad_norm": 0.068359375, "learning_rate": 0.001969619051518176, "loss": 1.234, "step": 5410 }, { "epoch": 0.47450019114066444, "grad_norm": 0.091796875, "learning_rate": 0.001969222942387895, "loss": 1.2111, "step": 5411 }, { "epoch": 0.474587882915039, "grad_norm": 0.0849609375, "learning_rate": 0.001968826804145305, "loss": 1.212, "step": 5412 }, { "epoch": 0.47467557468941357, "grad_norm": 0.06884765625, "learning_rate": 0.001968430636826534, "loss": 1.1464, "step": 5413 }, { "epoch": 0.4747632664637881, "grad_norm": 0.061279296875, "learning_rate": 0.0019680344404677105, "loss": 1.1829, "step": 5414 }, { "epoch": 0.47485095823816265, "grad_norm": 0.06298828125, "learning_rate": 0.0019676382151049675, "loss": 1.2464, "step": 5415 }, { "epoch": 0.47493865001253716, "grad_norm": 0.0693359375, "learning_rate": 0.001967241960774439, "loss": 1.2853, "step": 5416 }, { "epoch": 0.4750263417869117, "grad_norm": 0.072265625, "learning_rate": 0.001966845677512263, "loss": 1.1343, "step": 5417 }, { "epoch": 0.4751140335612863, "grad_norm": 0.0966796875, "learning_rate": 0.001966449365354579, "loss": 1.2139, "step": 5418 }, { "epoch": 0.4752017253356608, "grad_norm": 0.0654296875, "learning_rate": 0.0019660530243375295, "loss": 1.1973, "step": 5419 }, { "epoch": 0.47528941711003536, "grad_norm": 0.0615234375, "learning_rate": 0.0019656566544972603, "loss": 1.2211, "step": 5420 }, { "epoch": 0.4753771088844099, "grad_norm": 0.0908203125, "learning_rate": 0.001965260255869919, "loss": 1.1863, "step": 5421 }, { "epoch": 0.47546480065878444, "grad_norm": 0.06494140625, "learning_rate": 0.001964863828491656, "loss": 1.1678, "step": 5422 }, { "epoch": 0.475552492433159, "grad_norm": 0.060546875, "learning_rate": 0.0019644673723986246, "loss": 1.1527, "step": 5423 }, { "epoch": 0.47564018420753357, "grad_norm": 0.06591796875, "learning_rate": 0.001964070887626981, "loss": 1.1442, "step": 5424 }, { "epoch": 0.4757278759819081, "grad_norm": 0.056884765625, "learning_rate": 0.0019636743742128832, "loss": 1.2081, "step": 5425 }, { "epoch": 0.47581556775628264, "grad_norm": 0.06494140625, "learning_rate": 0.0019632778321924923, "loss": 1.2034, "step": 5426 }, { "epoch": 0.4759032595306572, "grad_norm": 0.0546875, "learning_rate": 0.001962881261601972, "loss": 1.1485, "step": 5427 }, { "epoch": 0.4759909513050317, "grad_norm": 0.06396484375, "learning_rate": 0.0019624846624774874, "loss": 1.2243, "step": 5428 }, { "epoch": 0.4760786430794063, "grad_norm": 0.05810546875, "learning_rate": 0.0019620880348552093, "loss": 1.2158, "step": 5429 }, { "epoch": 0.4761663348537808, "grad_norm": 0.061767578125, "learning_rate": 0.0019616913787713075, "loss": 1.2365, "step": 5430 }, { "epoch": 0.47625402662815536, "grad_norm": 0.08984375, "learning_rate": 0.001961294694261957, "loss": 1.2368, "step": 5431 }, { "epoch": 0.4763417184025299, "grad_norm": 0.0517578125, "learning_rate": 0.001960897981363333, "loss": 1.2043, "step": 5432 }, { "epoch": 0.47642941017690443, "grad_norm": 0.05126953125, "learning_rate": 0.0019605012401116167, "loss": 1.2459, "step": 5433 }, { "epoch": 0.476517101951279, "grad_norm": 0.10888671875, "learning_rate": 0.0019601044705429885, "loss": 1.2918, "step": 5434 }, { "epoch": 0.47660479372565356, "grad_norm": 0.058837890625, "learning_rate": 0.001959707672693633, "loss": 1.2426, "step": 5435 }, { "epoch": 0.4766924855000281, "grad_norm": 0.1484375, "learning_rate": 0.001959310846599738, "loss": 1.2625, "step": 5436 }, { "epoch": 0.47678017727440264, "grad_norm": 0.056884765625, "learning_rate": 0.0019589139922974916, "loss": 1.3093, "step": 5437 }, { "epoch": 0.4768678690487772, "grad_norm": 0.146484375, "learning_rate": 0.0019585171098230867, "loss": 1.2185, "step": 5438 }, { "epoch": 0.4769555608231517, "grad_norm": 0.060546875, "learning_rate": 0.0019581201992127173, "loss": 1.2195, "step": 5439 }, { "epoch": 0.4770432525975263, "grad_norm": 0.1240234375, "learning_rate": 0.001957723260502582, "loss": 1.201, "step": 5440 }, { "epoch": 0.4771309443719008, "grad_norm": 0.07373046875, "learning_rate": 0.001957326293728879, "loss": 1.2181, "step": 5441 }, { "epoch": 0.47721863614627535, "grad_norm": 0.1357421875, "learning_rate": 0.001956929298927812, "loss": 1.2474, "step": 5442 }, { "epoch": 0.4773063279206499, "grad_norm": 0.07177734375, "learning_rate": 0.0019565322761355844, "loss": 1.1798, "step": 5443 }, { "epoch": 0.47739401969502443, "grad_norm": 0.09228515625, "learning_rate": 0.0019561352253884058, "loss": 1.1936, "step": 5444 }, { "epoch": 0.477481711469399, "grad_norm": 0.10009765625, "learning_rate": 0.001955738146722484, "loss": 1.1683, "step": 5445 }, { "epoch": 0.47756940324377356, "grad_norm": 0.06591796875, "learning_rate": 0.0019553410401740323, "loss": 1.2151, "step": 5446 }, { "epoch": 0.47765709501814807, "grad_norm": 0.10302734375, "learning_rate": 0.0019549439057792665, "loss": 1.1989, "step": 5447 }, { "epoch": 0.47774478679252264, "grad_norm": 0.062255859375, "learning_rate": 0.0019545467435744036, "loss": 1.171, "step": 5448 }, { "epoch": 0.4778324785668972, "grad_norm": 0.07177734375, "learning_rate": 0.0019541495535956637, "loss": 1.1727, "step": 5449 }, { "epoch": 0.4779201703412717, "grad_norm": 0.07177734375, "learning_rate": 0.0019537523358792697, "loss": 1.2045, "step": 5450 }, { "epoch": 0.4780078621156463, "grad_norm": 0.068359375, "learning_rate": 0.0019533550904614473, "loss": 1.2046, "step": 5451 }, { "epoch": 0.4780955538900208, "grad_norm": 0.09423828125, "learning_rate": 0.0019529578173784234, "loss": 1.2, "step": 5452 }, { "epoch": 0.47818324566439535, "grad_norm": 0.064453125, "learning_rate": 0.0019525605166664285, "loss": 1.184, "step": 5453 }, { "epoch": 0.4782709374387699, "grad_norm": 0.0615234375, "learning_rate": 0.0019521631883616956, "loss": 1.1673, "step": 5454 }, { "epoch": 0.4783586292131444, "grad_norm": 0.08984375, "learning_rate": 0.0019517658325004603, "loss": 1.2285, "step": 5455 }, { "epoch": 0.478446320987519, "grad_norm": 0.080078125, "learning_rate": 0.0019513684491189595, "loss": 1.2298, "step": 5456 }, { "epoch": 0.47853401276189356, "grad_norm": 0.06884765625, "learning_rate": 0.0019509710382534346, "loss": 1.1992, "step": 5457 }, { "epoch": 0.47862170453626807, "grad_norm": 0.09130859375, "learning_rate": 0.0019505735999401275, "loss": 1.208, "step": 5458 }, { "epoch": 0.47870939631064263, "grad_norm": 0.10595703125, "learning_rate": 0.0019501761342152844, "loss": 1.2027, "step": 5459 }, { "epoch": 0.4787970880850172, "grad_norm": 0.123046875, "learning_rate": 0.0019497786411151524, "loss": 1.2381, "step": 5460 }, { "epoch": 0.4788847798593917, "grad_norm": 0.130859375, "learning_rate": 0.0019493811206759827, "loss": 1.1965, "step": 5461 }, { "epoch": 0.4789724716337663, "grad_norm": 0.07568359375, "learning_rate": 0.0019489835729340273, "loss": 1.2608, "step": 5462 }, { "epoch": 0.47906016340814084, "grad_norm": 0.1259765625, "learning_rate": 0.001948585997925542, "loss": 1.167, "step": 5463 }, { "epoch": 0.47914785518251535, "grad_norm": 0.055908203125, "learning_rate": 0.0019481883956867846, "loss": 1.2074, "step": 5464 }, { "epoch": 0.4792355469568899, "grad_norm": 0.0634765625, "learning_rate": 0.001947790766254015, "loss": 1.2072, "step": 5465 }, { "epoch": 0.4793232387312644, "grad_norm": 0.08740234375, "learning_rate": 0.0019473931096634963, "loss": 1.2494, "step": 5466 }, { "epoch": 0.479410930505639, "grad_norm": 0.049560546875, "learning_rate": 0.0019469954259514939, "loss": 1.2451, "step": 5467 }, { "epoch": 0.47949862228001355, "grad_norm": 0.0751953125, "learning_rate": 0.0019465977151542752, "loss": 1.2152, "step": 5468 }, { "epoch": 0.47958631405438806, "grad_norm": 0.05078125, "learning_rate": 0.0019461999773081108, "loss": 1.1444, "step": 5469 }, { "epoch": 0.47967400582876263, "grad_norm": 0.05029296875, "learning_rate": 0.0019458022124492727, "loss": 1.1401, "step": 5470 }, { "epoch": 0.4797616976031372, "grad_norm": 0.05322265625, "learning_rate": 0.0019454044206140368, "loss": 1.157, "step": 5471 }, { "epoch": 0.4798493893775117, "grad_norm": 0.0791015625, "learning_rate": 0.0019450066018386798, "loss": 1.239, "step": 5472 }, { "epoch": 0.47993708115188627, "grad_norm": 0.0654296875, "learning_rate": 0.001944608756159483, "loss": 1.1504, "step": 5473 }, { "epoch": 0.48002477292626083, "grad_norm": 0.056640625, "learning_rate": 0.0019442108836127275, "loss": 1.2241, "step": 5474 }, { "epoch": 0.48011246470063534, "grad_norm": 0.107421875, "learning_rate": 0.0019438129842346992, "loss": 1.1719, "step": 5475 }, { "epoch": 0.4802001564750099, "grad_norm": 0.06884765625, "learning_rate": 0.001943415058061685, "loss": 1.1424, "step": 5476 }, { "epoch": 0.4802878482493844, "grad_norm": 0.10595703125, "learning_rate": 0.0019430171051299753, "loss": 1.1998, "step": 5477 }, { "epoch": 0.480375540023759, "grad_norm": 0.06494140625, "learning_rate": 0.001942619125475862, "loss": 1.1683, "step": 5478 }, { "epoch": 0.48046323179813355, "grad_norm": 0.11474609375, "learning_rate": 0.0019422211191356396, "loss": 1.1765, "step": 5479 }, { "epoch": 0.48055092357250806, "grad_norm": 0.09423828125, "learning_rate": 0.0019418230861456055, "loss": 1.2355, "step": 5480 }, { "epoch": 0.4806386153468826, "grad_norm": 0.06689453125, "learning_rate": 0.0019414250265420594, "loss": 1.2041, "step": 5481 }, { "epoch": 0.4807263071212572, "grad_norm": 0.0693359375, "learning_rate": 0.0019410269403613031, "loss": 1.171, "step": 5482 }, { "epoch": 0.4808139988956317, "grad_norm": 0.052001953125, "learning_rate": 0.0019406288276396415, "loss": 1.1798, "step": 5483 }, { "epoch": 0.48090169067000627, "grad_norm": 0.051025390625, "learning_rate": 0.0019402306884133806, "loss": 1.1733, "step": 5484 }, { "epoch": 0.48098938244438083, "grad_norm": 0.119140625, "learning_rate": 0.0019398325227188305, "loss": 1.2217, "step": 5485 }, { "epoch": 0.48107707421875534, "grad_norm": 0.05517578125, "learning_rate": 0.0019394343305923025, "loss": 1.2762, "step": 5486 }, { "epoch": 0.4811647659931299, "grad_norm": 0.0947265625, "learning_rate": 0.0019390361120701105, "loss": 1.2468, "step": 5487 }, { "epoch": 0.4812524577675044, "grad_norm": 0.06591796875, "learning_rate": 0.0019386378671885712, "loss": 1.2049, "step": 5488 }, { "epoch": 0.481340149541879, "grad_norm": 0.05322265625, "learning_rate": 0.001938239595984004, "loss": 1.1584, "step": 5489 }, { "epoch": 0.48142784131625355, "grad_norm": 0.12109375, "learning_rate": 0.001937841298492729, "loss": 1.1926, "step": 5490 }, { "epoch": 0.48151553309062806, "grad_norm": 0.061767578125, "learning_rate": 0.0019374429747510712, "loss": 1.1512, "step": 5491 }, { "epoch": 0.4816032248650026, "grad_norm": 0.1357421875, "learning_rate": 0.0019370446247953558, "loss": 1.1684, "step": 5492 }, { "epoch": 0.4816909166393772, "grad_norm": 0.07470703125, "learning_rate": 0.0019366462486619118, "loss": 1.189, "step": 5493 }, { "epoch": 0.4817786084137517, "grad_norm": 0.08349609375, "learning_rate": 0.0019362478463870697, "loss": 1.3083, "step": 5494 }, { "epoch": 0.48186630018812626, "grad_norm": 0.076171875, "learning_rate": 0.0019358494180071635, "loss": 1.1765, "step": 5495 }, { "epoch": 0.4819539919625008, "grad_norm": 0.059326171875, "learning_rate": 0.0019354509635585277, "loss": 1.2896, "step": 5496 }, { "epoch": 0.48204168373687534, "grad_norm": 0.059814453125, "learning_rate": 0.0019350524830775014, "loss": 1.271, "step": 5497 }, { "epoch": 0.4821293755112499, "grad_norm": 0.08349609375, "learning_rate": 0.0019346539766004242, "loss": 1.1622, "step": 5498 }, { "epoch": 0.48221706728562447, "grad_norm": 0.058349609375, "learning_rate": 0.0019342554441636388, "loss": 1.2104, "step": 5499 }, { "epoch": 0.482304759059999, "grad_norm": 0.06640625, "learning_rate": 0.001933856885803491, "loss": 1.2401, "step": 5500 }, { "epoch": 0.482304759059999, "eval_loss": 1.219316005706787, "eval_runtime": 429.1773, "eval_samples_per_second": 33.662, "eval_steps_per_second": 8.416, "step": 5500 }, { "epoch": 0.48239245083437354, "grad_norm": 0.07958984375, "learning_rate": 0.001933458301556328, "loss": 1.197, "step": 5501 }, { "epoch": 0.48248014260874805, "grad_norm": 0.09033203125, "learning_rate": 0.0019330596914585, "loss": 1.1628, "step": 5502 }, { "epoch": 0.4825678343831226, "grad_norm": 0.09814453125, "learning_rate": 0.0019326610555463588, "loss": 1.1657, "step": 5503 }, { "epoch": 0.4826555261574972, "grad_norm": 0.07177734375, "learning_rate": 0.001932262393856259, "loss": 1.2219, "step": 5504 }, { "epoch": 0.4827432179318717, "grad_norm": 0.07763671875, "learning_rate": 0.0019318637064245571, "loss": 1.2057, "step": 5505 }, { "epoch": 0.48283090970624626, "grad_norm": 0.057373046875, "learning_rate": 0.001931464993287613, "loss": 1.2169, "step": 5506 }, { "epoch": 0.4829186014806208, "grad_norm": 0.050537109375, "learning_rate": 0.001931066254481788, "loss": 1.2064, "step": 5507 }, { "epoch": 0.48300629325499533, "grad_norm": 0.059814453125, "learning_rate": 0.0019306674900434464, "loss": 1.1401, "step": 5508 }, { "epoch": 0.4830939850293699, "grad_norm": 0.080078125, "learning_rate": 0.001930268700008954, "loss": 1.2172, "step": 5509 }, { "epoch": 0.48318167680374446, "grad_norm": 0.06396484375, "learning_rate": 0.0019298698844146798, "loss": 1.23, "step": 5510 }, { "epoch": 0.483269368578119, "grad_norm": 0.06396484375, "learning_rate": 0.0019294710432969946, "loss": 1.1839, "step": 5511 }, { "epoch": 0.48335706035249354, "grad_norm": 0.072265625, "learning_rate": 0.001929072176692272, "loss": 1.1342, "step": 5512 }, { "epoch": 0.48344475212686805, "grad_norm": 0.083984375, "learning_rate": 0.001928673284636887, "loss": 1.2556, "step": 5513 }, { "epoch": 0.4835324439012426, "grad_norm": 0.060546875, "learning_rate": 0.0019282743671672174, "loss": 1.1949, "step": 5514 }, { "epoch": 0.4836201356756172, "grad_norm": 0.07470703125, "learning_rate": 0.0019278754243196449, "loss": 1.1782, "step": 5515 }, { "epoch": 0.4837078274499917, "grad_norm": 0.053955078125, "learning_rate": 0.0019274764561305502, "loss": 1.1858, "step": 5516 }, { "epoch": 0.48379551922436625, "grad_norm": 0.0615234375, "learning_rate": 0.0019270774626363194, "loss": 1.2663, "step": 5517 }, { "epoch": 0.4838832109987408, "grad_norm": 0.126953125, "learning_rate": 0.0019266784438733387, "loss": 1.2028, "step": 5518 }, { "epoch": 0.48397090277311533, "grad_norm": 0.055419921875, "learning_rate": 0.001926279399877999, "loss": 1.2119, "step": 5519 }, { "epoch": 0.4840585945474899, "grad_norm": 0.1484375, "learning_rate": 0.001925880330686691, "loss": 1.2714, "step": 5520 }, { "epoch": 0.48414628632186446, "grad_norm": 0.061767578125, "learning_rate": 0.0019254812363358087, "loss": 1.2335, "step": 5521 }, { "epoch": 0.48423397809623897, "grad_norm": 0.08935546875, "learning_rate": 0.0019250821168617493, "loss": 1.2111, "step": 5522 }, { "epoch": 0.48432166987061354, "grad_norm": 0.07568359375, "learning_rate": 0.0019246829723009102, "loss": 1.2028, "step": 5523 }, { "epoch": 0.48440936164498805, "grad_norm": 0.05078125, "learning_rate": 0.0019242838026896935, "loss": 1.1844, "step": 5524 }, { "epoch": 0.4844970534193626, "grad_norm": 0.06396484375, "learning_rate": 0.0019238846080645022, "loss": 1.1689, "step": 5525 }, { "epoch": 0.4845847451937372, "grad_norm": 0.05810546875, "learning_rate": 0.0019234853884617417, "loss": 1.1467, "step": 5526 }, { "epoch": 0.4846724369681117, "grad_norm": 0.051025390625, "learning_rate": 0.0019230861439178194, "loss": 1.218, "step": 5527 }, { "epoch": 0.48476012874248625, "grad_norm": 0.059326171875, "learning_rate": 0.0019226868744691462, "loss": 1.2652, "step": 5528 }, { "epoch": 0.4848478205168608, "grad_norm": 0.0537109375, "learning_rate": 0.001922287580152134, "loss": 1.2123, "step": 5529 }, { "epoch": 0.4849355122912353, "grad_norm": 0.057861328125, "learning_rate": 0.001921888261003197, "loss": 1.1717, "step": 5530 }, { "epoch": 0.4850232040656099, "grad_norm": 0.053955078125, "learning_rate": 0.0019214889170587528, "loss": 1.1532, "step": 5531 }, { "epoch": 0.48511089583998446, "grad_norm": 0.051513671875, "learning_rate": 0.00192108954835522, "loss": 1.248, "step": 5532 }, { "epoch": 0.48519858761435897, "grad_norm": 0.09130859375, "learning_rate": 0.0019206901549290202, "loss": 1.1965, "step": 5533 }, { "epoch": 0.48528627938873353, "grad_norm": 0.10791015625, "learning_rate": 0.001920290736816577, "loss": 1.2204, "step": 5534 }, { "epoch": 0.4853739711631081, "grad_norm": 0.09326171875, "learning_rate": 0.0019198912940543167, "loss": 1.1857, "step": 5535 }, { "epoch": 0.4854616629374826, "grad_norm": 0.12890625, "learning_rate": 0.0019194918266786667, "loss": 1.2625, "step": 5536 }, { "epoch": 0.4855493547118572, "grad_norm": 0.0625, "learning_rate": 0.0019190923347260586, "loss": 1.1409, "step": 5537 }, { "epoch": 0.4856370464862317, "grad_norm": 0.07958984375, "learning_rate": 0.001918692818232923, "loss": 1.2703, "step": 5538 }, { "epoch": 0.48572473826060625, "grad_norm": 0.0546875, "learning_rate": 0.001918293277235697, "loss": 1.1986, "step": 5539 }, { "epoch": 0.4858124300349808, "grad_norm": 0.05126953125, "learning_rate": 0.0019178937117708163, "loss": 1.1772, "step": 5540 }, { "epoch": 0.4859001218093553, "grad_norm": 0.058349609375, "learning_rate": 0.0019174941218747204, "loss": 1.1859, "step": 5541 }, { "epoch": 0.4859878135837299, "grad_norm": 0.058349609375, "learning_rate": 0.0019170945075838515, "loss": 1.1774, "step": 5542 }, { "epoch": 0.48607550535810445, "grad_norm": 0.05859375, "learning_rate": 0.0019166948689346528, "loss": 1.1609, "step": 5543 }, { "epoch": 0.48616319713247896, "grad_norm": 0.06494140625, "learning_rate": 0.0019162952059635706, "loss": 1.2435, "step": 5544 }, { "epoch": 0.48625088890685353, "grad_norm": 0.06005859375, "learning_rate": 0.0019158955187070536, "loss": 1.1604, "step": 5545 }, { "epoch": 0.4863385806812281, "grad_norm": 0.06201171875, "learning_rate": 0.0019154958072015512, "loss": 1.2685, "step": 5546 }, { "epoch": 0.4864262724556026, "grad_norm": 0.0556640625, "learning_rate": 0.0019150960714835162, "loss": 1.2395, "step": 5547 }, { "epoch": 0.48651396422997717, "grad_norm": 0.06787109375, "learning_rate": 0.0019146963115894045, "loss": 1.2042, "step": 5548 }, { "epoch": 0.4866016560043517, "grad_norm": 0.0595703125, "learning_rate": 0.0019142965275556717, "loss": 1.2067, "step": 5549 }, { "epoch": 0.48668934777872624, "grad_norm": 0.07470703125, "learning_rate": 0.0019138967194187785, "loss": 1.2103, "step": 5550 }, { "epoch": 0.4867770395531008, "grad_norm": 0.09130859375, "learning_rate": 0.0019134968872151856, "loss": 1.1962, "step": 5551 }, { "epoch": 0.4868647313274753, "grad_norm": 0.07080078125, "learning_rate": 0.001913097030981357, "loss": 1.1923, "step": 5552 }, { "epoch": 0.4869524231018499, "grad_norm": 0.060302734375, "learning_rate": 0.0019126971507537577, "loss": 1.1955, "step": 5553 }, { "epoch": 0.48704011487622445, "grad_norm": 0.05126953125, "learning_rate": 0.0019122972465688568, "loss": 1.235, "step": 5554 }, { "epoch": 0.48712780665059896, "grad_norm": 0.0537109375, "learning_rate": 0.0019118973184631244, "loss": 1.2281, "step": 5555 }, { "epoch": 0.4872154984249735, "grad_norm": 0.06298828125, "learning_rate": 0.001911497366473032, "loss": 1.2281, "step": 5556 }, { "epoch": 0.4873031901993481, "grad_norm": 0.05517578125, "learning_rate": 0.0019110973906350556, "loss": 1.2055, "step": 5557 }, { "epoch": 0.4873908819737226, "grad_norm": 0.05078125, "learning_rate": 0.0019106973909856708, "loss": 1.1741, "step": 5558 }, { "epoch": 0.48747857374809717, "grad_norm": 0.06640625, "learning_rate": 0.0019102973675613568, "loss": 1.1682, "step": 5559 }, { "epoch": 0.4875662655224717, "grad_norm": 0.06396484375, "learning_rate": 0.0019098973203985948, "loss": 1.2268, "step": 5560 }, { "epoch": 0.48765395729684624, "grad_norm": 0.1181640625, "learning_rate": 0.0019094972495338684, "loss": 1.2113, "step": 5561 }, { "epoch": 0.4877416490712208, "grad_norm": 0.047119140625, "learning_rate": 0.0019090971550036627, "loss": 1.1406, "step": 5562 }, { "epoch": 0.4878293408455953, "grad_norm": 0.09912109375, "learning_rate": 0.0019086970368444654, "loss": 1.1912, "step": 5563 }, { "epoch": 0.4879170326199699, "grad_norm": 0.052978515625, "learning_rate": 0.001908296895092766, "loss": 1.187, "step": 5564 }, { "epoch": 0.48800472439434445, "grad_norm": 0.0595703125, "learning_rate": 0.0019078967297850562, "loss": 1.2473, "step": 5565 }, { "epoch": 0.48809241616871896, "grad_norm": 0.06689453125, "learning_rate": 0.0019074965409578307, "loss": 1.2614, "step": 5566 }, { "epoch": 0.4881801079430935, "grad_norm": 0.06396484375, "learning_rate": 0.0019070963286475858, "loss": 1.1948, "step": 5567 }, { "epoch": 0.4882677997174681, "grad_norm": 0.059814453125, "learning_rate": 0.001906696092890819, "loss": 1.2163, "step": 5568 }, { "epoch": 0.4883554914918426, "grad_norm": 0.10595703125, "learning_rate": 0.0019062958337240311, "loss": 1.1694, "step": 5569 }, { "epoch": 0.48844318326621716, "grad_norm": 0.0712890625, "learning_rate": 0.0019058955511837246, "loss": 1.1762, "step": 5570 }, { "epoch": 0.4885308750405917, "grad_norm": 0.0751953125, "learning_rate": 0.001905495245306405, "loss": 1.1537, "step": 5571 }, { "epoch": 0.48861856681496624, "grad_norm": 0.06689453125, "learning_rate": 0.0019050949161285781, "loss": 1.2167, "step": 5572 }, { "epoch": 0.4887062585893408, "grad_norm": 0.056640625, "learning_rate": 0.001904694563686754, "loss": 1.1874, "step": 5573 }, { "epoch": 0.4887939503637153, "grad_norm": 0.053466796875, "learning_rate": 0.0019042941880174425, "loss": 1.2592, "step": 5574 }, { "epoch": 0.4888816421380899, "grad_norm": 0.053955078125, "learning_rate": 0.0019038937891571578, "loss": 1.2474, "step": 5575 }, { "epoch": 0.48896933391246444, "grad_norm": 0.06787109375, "learning_rate": 0.001903493367142415, "loss": 1.2351, "step": 5576 }, { "epoch": 0.48905702568683895, "grad_norm": 0.06201171875, "learning_rate": 0.0019030929220097317, "loss": 1.1738, "step": 5577 }, { "epoch": 0.4891447174612135, "grad_norm": 0.051025390625, "learning_rate": 0.0019026924537956266, "loss": 1.1727, "step": 5578 }, { "epoch": 0.4892324092355881, "grad_norm": 0.056396484375, "learning_rate": 0.001902291962536623, "loss": 1.1909, "step": 5579 }, { "epoch": 0.4893201010099626, "grad_norm": 0.054931640625, "learning_rate": 0.0019018914482692433, "loss": 1.2082, "step": 5580 }, { "epoch": 0.48940779278433716, "grad_norm": 0.058837890625, "learning_rate": 0.0019014909110300132, "loss": 1.2708, "step": 5581 }, { "epoch": 0.4894954845587117, "grad_norm": 0.054931640625, "learning_rate": 0.0019010903508554617, "loss": 1.1769, "step": 5582 }, { "epoch": 0.48958317633308623, "grad_norm": 0.06689453125, "learning_rate": 0.0019006897677821182, "loss": 1.2459, "step": 5583 }, { "epoch": 0.4896708681074608, "grad_norm": 0.058837890625, "learning_rate": 0.0019002891618465152, "loss": 1.2269, "step": 5584 }, { "epoch": 0.4897585598818353, "grad_norm": 0.07666015625, "learning_rate": 0.0018998885330851866, "loss": 1.1267, "step": 5585 }, { "epoch": 0.4898462516562099, "grad_norm": 0.05810546875, "learning_rate": 0.0018994878815346695, "loss": 1.2204, "step": 5586 }, { "epoch": 0.48993394343058444, "grad_norm": 0.0888671875, "learning_rate": 0.0018990872072315009, "loss": 1.171, "step": 5587 }, { "epoch": 0.49002163520495895, "grad_norm": 0.0634765625, "learning_rate": 0.0018986865102122226, "loss": 1.1459, "step": 5588 }, { "epoch": 0.4901093269793335, "grad_norm": 0.053955078125, "learning_rate": 0.001898285790513376, "loss": 1.1943, "step": 5589 }, { "epoch": 0.4901970187537081, "grad_norm": 0.109375, "learning_rate": 0.001897885048171507, "loss": 1.1981, "step": 5590 }, { "epoch": 0.4902847105280826, "grad_norm": 0.099609375, "learning_rate": 0.0018974842832231603, "loss": 1.2118, "step": 5591 }, { "epoch": 0.49037240230245716, "grad_norm": 0.080078125, "learning_rate": 0.001897083495704887, "loss": 1.1684, "step": 5592 }, { "epoch": 0.4904600940768317, "grad_norm": 0.08837890625, "learning_rate": 0.001896682685653236, "loss": 1.1585, "step": 5593 }, { "epoch": 0.49054778585120623, "grad_norm": 0.058837890625, "learning_rate": 0.0018962818531047612, "loss": 1.2193, "step": 5594 }, { "epoch": 0.4906354776255808, "grad_norm": 0.095703125, "learning_rate": 0.001895880998096017, "loss": 1.1265, "step": 5595 }, { "epoch": 0.4907231693999553, "grad_norm": 0.059814453125, "learning_rate": 0.0018954801206635607, "loss": 1.227, "step": 5596 }, { "epoch": 0.49081086117432987, "grad_norm": 0.04833984375, "learning_rate": 0.0018950792208439515, "loss": 1.1787, "step": 5597 }, { "epoch": 0.49089855294870444, "grad_norm": 0.076171875, "learning_rate": 0.0018946782986737495, "loss": 1.1929, "step": 5598 }, { "epoch": 0.49098624472307895, "grad_norm": 0.052490234375, "learning_rate": 0.001894277354189518, "loss": 1.2112, "step": 5599 }, { "epoch": 0.4910739364974535, "grad_norm": 0.0673828125, "learning_rate": 0.0018938763874278227, "loss": 1.194, "step": 5600 }, { "epoch": 0.4911616282718281, "grad_norm": 0.064453125, "learning_rate": 0.0018934753984252309, "loss": 1.1987, "step": 5601 }, { "epoch": 0.4912493200462026, "grad_norm": 0.0732421875, "learning_rate": 0.0018930743872183104, "loss": 1.2043, "step": 5602 }, { "epoch": 0.49133701182057715, "grad_norm": 0.0673828125, "learning_rate": 0.001892673353843634, "loss": 1.1922, "step": 5603 }, { "epoch": 0.4914247035949517, "grad_norm": 0.06640625, "learning_rate": 0.0018922722983377737, "loss": 1.1772, "step": 5604 }, { "epoch": 0.4915123953693262, "grad_norm": 0.0966796875, "learning_rate": 0.0018918712207373059, "loss": 1.1816, "step": 5605 }, { "epoch": 0.4916000871437008, "grad_norm": 0.06201171875, "learning_rate": 0.0018914701210788066, "loss": 1.2478, "step": 5606 }, { "epoch": 0.4916877789180753, "grad_norm": 0.09765625, "learning_rate": 0.0018910689993988559, "loss": 1.1837, "step": 5607 }, { "epoch": 0.49177547069244987, "grad_norm": 0.050048828125, "learning_rate": 0.0018906678557340344, "loss": 1.1688, "step": 5608 }, { "epoch": 0.49186316246682443, "grad_norm": 0.054443359375, "learning_rate": 0.0018902666901209257, "loss": 1.1851, "step": 5609 }, { "epoch": 0.49195085424119894, "grad_norm": 0.10546875, "learning_rate": 0.0018898655025961155, "loss": 1.238, "step": 5610 }, { "epoch": 0.4920385460155735, "grad_norm": 0.11181640625, "learning_rate": 0.0018894642931961904, "loss": 1.207, "step": 5611 }, { "epoch": 0.4921262377899481, "grad_norm": 0.1533203125, "learning_rate": 0.0018890630619577402, "loss": 1.2591, "step": 5612 }, { "epoch": 0.4922139295643226, "grad_norm": 0.1357421875, "learning_rate": 0.0018886618089173559, "loss": 1.2329, "step": 5613 }, { "epoch": 0.49230162133869715, "grad_norm": 0.146484375, "learning_rate": 0.0018882605341116305, "loss": 1.2227, "step": 5614 }, { "epoch": 0.4923893131130717, "grad_norm": 0.10107421875, "learning_rate": 0.0018878592375771595, "loss": 1.1316, "step": 5615 }, { "epoch": 0.4924770048874462, "grad_norm": 0.1533203125, "learning_rate": 0.00188745791935054, "loss": 1.2343, "step": 5616 }, { "epoch": 0.4925646966618208, "grad_norm": 0.0478515625, "learning_rate": 0.0018870565794683715, "loss": 1.1185, "step": 5617 }, { "epoch": 0.49265238843619535, "grad_norm": 0.1357421875, "learning_rate": 0.0018866552179672548, "loss": 1.2056, "step": 5618 }, { "epoch": 0.49274008021056986, "grad_norm": 0.09130859375, "learning_rate": 0.001886253834883793, "loss": 1.1655, "step": 5619 }, { "epoch": 0.49282777198494443, "grad_norm": 0.048583984375, "learning_rate": 0.001885852430254591, "loss": 1.162, "step": 5620 }, { "epoch": 0.49291546375931894, "grad_norm": 0.12255859375, "learning_rate": 0.001885451004116257, "loss": 1.2272, "step": 5621 }, { "epoch": 0.4930031555336935, "grad_norm": 0.10302734375, "learning_rate": 0.0018850495565053992, "loss": 1.214, "step": 5622 }, { "epoch": 0.49309084730806807, "grad_norm": 0.07470703125, "learning_rate": 0.001884648087458628, "loss": 1.1889, "step": 5623 }, { "epoch": 0.4931785390824426, "grad_norm": 0.1259765625, "learning_rate": 0.0018842465970125572, "loss": 1.2584, "step": 5624 }, { "epoch": 0.49326623085681714, "grad_norm": 0.051513671875, "learning_rate": 0.0018838450852038018, "loss": 1.1782, "step": 5625 }, { "epoch": 0.4933539226311917, "grad_norm": 0.09228515625, "learning_rate": 0.001883443552068978, "loss": 1.1668, "step": 5626 }, { "epoch": 0.4934416144055662, "grad_norm": 0.10888671875, "learning_rate": 0.0018830419976447047, "loss": 1.2123, "step": 5627 }, { "epoch": 0.4935293061799408, "grad_norm": 0.09716796875, "learning_rate": 0.001882640421967603, "loss": 1.1915, "step": 5628 }, { "epoch": 0.49361699795431535, "grad_norm": 0.11572265625, "learning_rate": 0.0018822388250742954, "loss": 1.2291, "step": 5629 }, { "epoch": 0.49370468972868986, "grad_norm": 0.0537109375, "learning_rate": 0.0018818372070014073, "loss": 1.1561, "step": 5630 }, { "epoch": 0.4937923815030644, "grad_norm": 0.06396484375, "learning_rate": 0.0018814355677855632, "loss": 1.2007, "step": 5631 }, { "epoch": 0.49388007327743894, "grad_norm": 0.07958984375, "learning_rate": 0.0018810339074633931, "loss": 1.2564, "step": 5632 }, { "epoch": 0.4939677650518135, "grad_norm": 0.051025390625, "learning_rate": 0.001880632226071527, "loss": 1.2089, "step": 5633 }, { "epoch": 0.49405545682618807, "grad_norm": 0.09814453125, "learning_rate": 0.0018802305236465973, "loss": 1.2258, "step": 5634 }, { "epoch": 0.4941431486005626, "grad_norm": 0.06494140625, "learning_rate": 0.0018798288002252381, "loss": 1.1608, "step": 5635 }, { "epoch": 0.49423084037493714, "grad_norm": 0.0693359375, "learning_rate": 0.0018794270558440858, "loss": 1.1892, "step": 5636 }, { "epoch": 0.4943185321493117, "grad_norm": 0.054931640625, "learning_rate": 0.001879025290539778, "loss": 1.1637, "step": 5637 }, { "epoch": 0.4944062239236862, "grad_norm": 0.07080078125, "learning_rate": 0.0018786235043489552, "loss": 1.2028, "step": 5638 }, { "epoch": 0.4944939156980608, "grad_norm": 0.052978515625, "learning_rate": 0.0018782216973082593, "loss": 1.2531, "step": 5639 }, { "epoch": 0.49458160747243535, "grad_norm": 0.059814453125, "learning_rate": 0.0018778198694543332, "loss": 1.2358, "step": 5640 }, { "epoch": 0.49466929924680986, "grad_norm": 0.051513671875, "learning_rate": 0.0018774180208238232, "loss": 1.1678, "step": 5641 }, { "epoch": 0.4947569910211844, "grad_norm": 0.051513671875, "learning_rate": 0.0018770161514533772, "loss": 1.2483, "step": 5642 }, { "epoch": 0.49484468279555893, "grad_norm": 0.059814453125, "learning_rate": 0.0018766142613796438, "loss": 1.188, "step": 5643 }, { "epoch": 0.4949323745699335, "grad_norm": 0.064453125, "learning_rate": 0.001876212350639275, "loss": 1.1703, "step": 5644 }, { "epoch": 0.49502006634430806, "grad_norm": 0.05322265625, "learning_rate": 0.0018758104192689235, "loss": 1.2249, "step": 5645 }, { "epoch": 0.4951077581186826, "grad_norm": 0.1083984375, "learning_rate": 0.001875408467305245, "loss": 1.3006, "step": 5646 }, { "epoch": 0.49519544989305714, "grad_norm": 0.06982421875, "learning_rate": 0.001875006494784896, "loss": 1.2372, "step": 5647 }, { "epoch": 0.4952831416674317, "grad_norm": 0.08544921875, "learning_rate": 0.0018746045017445359, "loss": 1.2056, "step": 5648 }, { "epoch": 0.4953708334418062, "grad_norm": 0.1083984375, "learning_rate": 0.0018742024882208242, "loss": 1.1927, "step": 5649 }, { "epoch": 0.4954585252161808, "grad_norm": 0.0615234375, "learning_rate": 0.0018738004542504252, "loss": 1.203, "step": 5650 }, { "epoch": 0.49554621699055534, "grad_norm": 0.125, "learning_rate": 0.001873398399870002, "loss": 1.229, "step": 5651 }, { "epoch": 0.49563390876492985, "grad_norm": 0.055908203125, "learning_rate": 0.0018729963251162217, "loss": 1.1645, "step": 5652 }, { "epoch": 0.4957216005393044, "grad_norm": 0.0673828125, "learning_rate": 0.001872594230025752, "loss": 1.1671, "step": 5653 }, { "epoch": 0.495809292313679, "grad_norm": 0.054931640625, "learning_rate": 0.0018721921146352635, "loss": 1.171, "step": 5654 }, { "epoch": 0.4958969840880535, "grad_norm": 0.05859375, "learning_rate": 0.0018717899789814274, "loss": 1.2281, "step": 5655 }, { "epoch": 0.49598467586242806, "grad_norm": 0.05078125, "learning_rate": 0.0018713878231009177, "loss": 1.2276, "step": 5656 }, { "epoch": 0.49607236763680257, "grad_norm": 0.0654296875, "learning_rate": 0.0018709856470304103, "loss": 1.2074, "step": 5657 }, { "epoch": 0.49616005941117713, "grad_norm": 0.052978515625, "learning_rate": 0.0018705834508065823, "loss": 1.23, "step": 5658 }, { "epoch": 0.4962477511855517, "grad_norm": 0.0810546875, "learning_rate": 0.001870181234466113, "loss": 1.2342, "step": 5659 }, { "epoch": 0.4963354429599262, "grad_norm": 0.06494140625, "learning_rate": 0.0018697789980456833, "loss": 1.2338, "step": 5660 }, { "epoch": 0.4964231347343008, "grad_norm": 0.08837890625, "learning_rate": 0.0018693767415819769, "loss": 1.2406, "step": 5661 }, { "epoch": 0.49651082650867534, "grad_norm": 0.0615234375, "learning_rate": 0.0018689744651116773, "loss": 1.1841, "step": 5662 }, { "epoch": 0.49659851828304985, "grad_norm": 0.072265625, "learning_rate": 0.0018685721686714724, "loss": 1.1813, "step": 5663 }, { "epoch": 0.4966862100574244, "grad_norm": 0.09814453125, "learning_rate": 0.0018681698522980497, "loss": 1.1984, "step": 5664 }, { "epoch": 0.496773901831799, "grad_norm": 0.1220703125, "learning_rate": 0.0018677675160280992, "loss": 1.1984, "step": 5665 }, { "epoch": 0.4968615936061735, "grad_norm": 0.07666015625, "learning_rate": 0.001867365159898314, "loss": 1.1632, "step": 5666 }, { "epoch": 0.49694928538054806, "grad_norm": 0.10888671875, "learning_rate": 0.001866962783945387, "loss": 1.2188, "step": 5667 }, { "epoch": 0.49703697715492257, "grad_norm": 0.05615234375, "learning_rate": 0.0018665603882060144, "loss": 1.1988, "step": 5668 }, { "epoch": 0.49712466892929713, "grad_norm": 0.06396484375, "learning_rate": 0.001866157972716893, "loss": 1.2014, "step": 5669 }, { "epoch": 0.4972123607036717, "grad_norm": 0.12158203125, "learning_rate": 0.0018657555375147232, "loss": 1.204, "step": 5670 }, { "epoch": 0.4973000524780462, "grad_norm": 0.060791015625, "learning_rate": 0.0018653530826362047, "loss": 1.2269, "step": 5671 }, { "epoch": 0.49738774425242077, "grad_norm": 0.09765625, "learning_rate": 0.0018649506081180418, "loss": 1.1729, "step": 5672 }, { "epoch": 0.49747543602679534, "grad_norm": 0.08056640625, "learning_rate": 0.0018645481139969377, "loss": 1.1768, "step": 5673 }, { "epoch": 0.49756312780116985, "grad_norm": 0.0703125, "learning_rate": 0.0018641456003096002, "loss": 1.1986, "step": 5674 }, { "epoch": 0.4976508195755444, "grad_norm": 0.09423828125, "learning_rate": 0.001863743067092736, "loss": 1.2033, "step": 5675 }, { "epoch": 0.497738511349919, "grad_norm": 0.0634765625, "learning_rate": 0.0018633405143830557, "loss": 1.1564, "step": 5676 }, { "epoch": 0.4978262031242935, "grad_norm": 0.049560546875, "learning_rate": 0.0018629379422172715, "loss": 1.2138, "step": 5677 }, { "epoch": 0.49791389489866805, "grad_norm": 0.056884765625, "learning_rate": 0.0018625353506320967, "loss": 1.1433, "step": 5678 }, { "epoch": 0.49800158667304256, "grad_norm": 0.05322265625, "learning_rate": 0.0018621327396642468, "loss": 1.227, "step": 5679 }, { "epoch": 0.4980892784474171, "grad_norm": 0.053466796875, "learning_rate": 0.001861730109350438, "loss": 1.2443, "step": 5680 }, { "epoch": 0.4981769702217917, "grad_norm": 0.0634765625, "learning_rate": 0.0018613274597273912, "loss": 1.2763, "step": 5681 }, { "epoch": 0.4982646619961662, "grad_norm": 0.05712890625, "learning_rate": 0.0018609247908318244, "loss": 1.1643, "step": 5682 }, { "epoch": 0.49835235377054077, "grad_norm": 0.07666015625, "learning_rate": 0.0018605221027004615, "loss": 1.2103, "step": 5683 }, { "epoch": 0.49844004554491533, "grad_norm": 0.052490234375, "learning_rate": 0.0018601193953700261, "loss": 1.1926, "step": 5684 }, { "epoch": 0.49852773731928984, "grad_norm": 0.06640625, "learning_rate": 0.0018597166688772446, "loss": 1.1503, "step": 5685 }, { "epoch": 0.4986154290936644, "grad_norm": 0.05126953125, "learning_rate": 0.001859313923258844, "loss": 1.2024, "step": 5686 }, { "epoch": 0.498703120868039, "grad_norm": 0.07080078125, "learning_rate": 0.0018589111585515538, "loss": 1.2137, "step": 5687 }, { "epoch": 0.4987908126424135, "grad_norm": 0.053955078125, "learning_rate": 0.0018585083747921056, "loss": 1.1788, "step": 5688 }, { "epoch": 0.49887850441678805, "grad_norm": 0.0537109375, "learning_rate": 0.0018581055720172318, "loss": 1.1476, "step": 5689 }, { "epoch": 0.4989661961911626, "grad_norm": 0.057861328125, "learning_rate": 0.001857702750263667, "loss": 1.1392, "step": 5690 }, { "epoch": 0.4990538879655371, "grad_norm": 0.056640625, "learning_rate": 0.0018572999095681476, "loss": 1.2465, "step": 5691 }, { "epoch": 0.4991415797399117, "grad_norm": 0.060546875, "learning_rate": 0.0018568970499674119, "loss": 1.2366, "step": 5692 }, { "epoch": 0.4992292715142862, "grad_norm": 0.060546875, "learning_rate": 0.0018564941714981987, "loss": 1.1947, "step": 5693 }, { "epoch": 0.49931696328866076, "grad_norm": 0.06494140625, "learning_rate": 0.0018560912741972504, "loss": 1.2802, "step": 5694 }, { "epoch": 0.49940465506303533, "grad_norm": 0.07177734375, "learning_rate": 0.0018556883581013098, "loss": 1.2426, "step": 5695 }, { "epoch": 0.49949234683740984, "grad_norm": 0.1396484375, "learning_rate": 0.0018552854232471222, "loss": 1.2008, "step": 5696 }, { "epoch": 0.4995800386117844, "grad_norm": 0.05810546875, "learning_rate": 0.0018548824696714334, "loss": 1.183, "step": 5697 }, { "epoch": 0.49966773038615897, "grad_norm": 0.12353515625, "learning_rate": 0.001854479497410993, "loss": 1.2393, "step": 5698 }, { "epoch": 0.4997554221605335, "grad_norm": 0.11279296875, "learning_rate": 0.0018540765065025498, "loss": 1.216, "step": 5699 }, { "epoch": 0.49984311393490805, "grad_norm": 0.11181640625, "learning_rate": 0.001853673496982856, "loss": 1.213, "step": 5700 }, { "epoch": 0.4999308057092826, "grad_norm": 0.12890625, "learning_rate": 0.0018532704688886657, "loss": 1.1997, "step": 5701 }, { "epoch": 0.5000184974836571, "grad_norm": 0.062255859375, "learning_rate": 0.0018528674222567328, "loss": 1.2696, "step": 5702 }, { "epoch": 0.5001061892580316, "grad_norm": 0.11083984375, "learning_rate": 0.001852464357123815, "loss": 1.2465, "step": 5703 }, { "epoch": 0.5001938810324063, "grad_norm": 0.08251953125, "learning_rate": 0.0018520612735266702, "loss": 1.1833, "step": 5704 }, { "epoch": 0.5002815728067808, "grad_norm": 0.078125, "learning_rate": 0.001851658171502059, "loss": 1.1356, "step": 5705 }, { "epoch": 0.5003692645811553, "grad_norm": 0.11767578125, "learning_rate": 0.0018512550510867431, "loss": 1.1625, "step": 5706 }, { "epoch": 0.5004569563555299, "grad_norm": 0.05908203125, "learning_rate": 0.001850851912317486, "loss": 1.1695, "step": 5707 }, { "epoch": 0.5005446481299044, "grad_norm": 0.0478515625, "learning_rate": 0.0018504487552310535, "loss": 1.15, "step": 5708 }, { "epoch": 0.5006323399042789, "grad_norm": 0.08642578125, "learning_rate": 0.0018500455798642114, "loss": 1.2324, "step": 5709 }, { "epoch": 0.5007200316786535, "grad_norm": 0.05517578125, "learning_rate": 0.001849642386253729, "loss": 1.2476, "step": 5710 }, { "epoch": 0.500807723453028, "grad_norm": 0.0478515625, "learning_rate": 0.001849239174436376, "loss": 1.1666, "step": 5711 }, { "epoch": 0.5008954152274026, "grad_norm": 0.0517578125, "learning_rate": 0.0018488359444489252, "loss": 1.2142, "step": 5712 }, { "epoch": 0.5009831070017772, "grad_norm": 0.05615234375, "learning_rate": 0.0018484326963281493, "loss": 1.2281, "step": 5713 }, { "epoch": 0.5010707987761517, "grad_norm": 0.058837890625, "learning_rate": 0.001848029430110824, "loss": 1.1742, "step": 5714 }, { "epoch": 0.5011584905505262, "grad_norm": 0.054931640625, "learning_rate": 0.0018476261458337255, "loss": 1.1846, "step": 5715 }, { "epoch": 0.5012461823249008, "grad_norm": 0.0634765625, "learning_rate": 0.0018472228435336328, "loss": 1.1571, "step": 5716 }, { "epoch": 0.5013338740992753, "grad_norm": 0.062255859375, "learning_rate": 0.0018468195232473263, "loss": 1.1575, "step": 5717 }, { "epoch": 0.5014215658736498, "grad_norm": 0.06298828125, "learning_rate": 0.0018464161850115864, "loss": 1.1402, "step": 5718 }, { "epoch": 0.5015092576480245, "grad_norm": 0.10107421875, "learning_rate": 0.0018460128288631983, "loss": 1.2355, "step": 5719 }, { "epoch": 0.501596949422399, "grad_norm": 0.0654296875, "learning_rate": 0.001845609454838946, "loss": 1.1909, "step": 5720 }, { "epoch": 0.5016846411967735, "grad_norm": 0.10595703125, "learning_rate": 0.0018452060629756165, "loss": 1.1982, "step": 5721 }, { "epoch": 0.501772332971148, "grad_norm": 0.07763671875, "learning_rate": 0.0018448026533099975, "loss": 1.2584, "step": 5722 }, { "epoch": 0.5018600247455226, "grad_norm": 0.0947265625, "learning_rate": 0.0018443992258788805, "loss": 1.2067, "step": 5723 }, { "epoch": 0.5019477165198971, "grad_norm": 0.0537109375, "learning_rate": 0.001843995780719055, "loss": 1.1983, "step": 5724 }, { "epoch": 0.5020354082942716, "grad_norm": 0.1025390625, "learning_rate": 0.0018435923178673152, "loss": 1.2062, "step": 5725 }, { "epoch": 0.5021231000686462, "grad_norm": 0.06689453125, "learning_rate": 0.0018431888373604559, "loss": 1.2658, "step": 5726 }, { "epoch": 0.5022107918430208, "grad_norm": 0.05322265625, "learning_rate": 0.0018427853392352731, "loss": 1.1904, "step": 5727 }, { "epoch": 0.5022984836173953, "grad_norm": 0.064453125, "learning_rate": 0.0018423818235285654, "loss": 1.2019, "step": 5728 }, { "epoch": 0.5023861753917699, "grad_norm": 0.05615234375, "learning_rate": 0.0018419782902771315, "loss": 1.1574, "step": 5729 }, { "epoch": 0.5024738671661444, "grad_norm": 0.0615234375, "learning_rate": 0.0018415747395177737, "loss": 1.2041, "step": 5730 }, { "epoch": 0.5025615589405189, "grad_norm": 0.055908203125, "learning_rate": 0.0018411711712872938, "loss": 1.2191, "step": 5731 }, { "epoch": 0.5026492507148935, "grad_norm": 0.061767578125, "learning_rate": 0.001840767585622497, "loss": 1.1694, "step": 5732 }, { "epoch": 0.502736942489268, "grad_norm": 0.060302734375, "learning_rate": 0.0018403639825601886, "loss": 1.2089, "step": 5733 }, { "epoch": 0.5028246342636425, "grad_norm": 0.06591796875, "learning_rate": 0.0018399603621371764, "loss": 1.1681, "step": 5734 }, { "epoch": 0.5029123260380172, "grad_norm": 0.055908203125, "learning_rate": 0.0018395567243902696, "loss": 1.2109, "step": 5735 }, { "epoch": 0.5030000178123917, "grad_norm": 0.0712890625, "learning_rate": 0.0018391530693562788, "loss": 1.1286, "step": 5736 }, { "epoch": 0.5030877095867662, "grad_norm": 0.1064453125, "learning_rate": 0.0018387493970720165, "loss": 1.2475, "step": 5737 }, { "epoch": 0.5031754013611408, "grad_norm": 0.08447265625, "learning_rate": 0.0018383457075742962, "loss": 1.1675, "step": 5738 }, { "epoch": 0.5032630931355153, "grad_norm": 0.10205078125, "learning_rate": 0.0018379420008999337, "loss": 1.1428, "step": 5739 }, { "epoch": 0.5033507849098898, "grad_norm": 0.05810546875, "learning_rate": 0.0018375382770857462, "loss": 1.252, "step": 5740 }, { "epoch": 0.5034384766842644, "grad_norm": 0.072265625, "learning_rate": 0.0018371345361685514, "loss": 1.2008, "step": 5741 }, { "epoch": 0.503526168458639, "grad_norm": 0.0556640625, "learning_rate": 0.0018367307781851701, "loss": 1.1536, "step": 5742 }, { "epoch": 0.5036138602330135, "grad_norm": 0.052978515625, "learning_rate": 0.0018363270031724244, "loss": 1.2095, "step": 5743 }, { "epoch": 0.5037015520073881, "grad_norm": 0.056640625, "learning_rate": 0.0018359232111671367, "loss": 1.2011, "step": 5744 }, { "epoch": 0.5037892437817626, "grad_norm": 0.068359375, "learning_rate": 0.0018355194022061322, "loss": 1.1878, "step": 5745 }, { "epoch": 0.5038769355561371, "grad_norm": 0.0615234375, "learning_rate": 0.0018351155763262365, "loss": 1.2032, "step": 5746 }, { "epoch": 0.5039646273305116, "grad_norm": 0.06494140625, "learning_rate": 0.001834711733564279, "loss": 1.1895, "step": 5747 }, { "epoch": 0.5040523191048862, "grad_norm": 0.07373046875, "learning_rate": 0.0018343078739570876, "loss": 1.1981, "step": 5748 }, { "epoch": 0.5041400108792607, "grad_norm": 0.07177734375, "learning_rate": 0.001833903997541494, "loss": 1.2627, "step": 5749 }, { "epoch": 0.5042277026536353, "grad_norm": 0.07666015625, "learning_rate": 0.0018335001043543312, "loss": 1.2286, "step": 5750 }, { "epoch": 0.5043153944280099, "grad_norm": 0.0810546875, "learning_rate": 0.0018330961944324315, "loss": 1.2236, "step": 5751 }, { "epoch": 0.5044030862023844, "grad_norm": 0.08642578125, "learning_rate": 0.0018326922678126322, "loss": 1.1926, "step": 5752 }, { "epoch": 0.5044907779767589, "grad_norm": 0.06494140625, "learning_rate": 0.0018322883245317696, "loss": 1.2759, "step": 5753 }, { "epoch": 0.5045784697511335, "grad_norm": 0.07958984375, "learning_rate": 0.0018318843646266823, "loss": 1.2586, "step": 5754 }, { "epoch": 0.504666161525508, "grad_norm": 0.078125, "learning_rate": 0.0018314803881342104, "loss": 1.1861, "step": 5755 }, { "epoch": 0.5047538532998825, "grad_norm": 0.06689453125, "learning_rate": 0.0018310763950911964, "loss": 1.1615, "step": 5756 }, { "epoch": 0.5048415450742572, "grad_norm": 0.06689453125, "learning_rate": 0.0018306723855344817, "loss": 1.2668, "step": 5757 }, { "epoch": 0.5049292368486317, "grad_norm": 0.0654296875, "learning_rate": 0.0018302683595009122, "loss": 1.1961, "step": 5758 }, { "epoch": 0.5050169286230062, "grad_norm": 0.058837890625, "learning_rate": 0.0018298643170273333, "loss": 1.1906, "step": 5759 }, { "epoch": 0.5051046203973808, "grad_norm": 0.09521484375, "learning_rate": 0.0018294602581505933, "loss": 1.2408, "step": 5760 }, { "epoch": 0.5051923121717553, "grad_norm": 0.06396484375, "learning_rate": 0.0018290561829075415, "loss": 1.2697, "step": 5761 }, { "epoch": 0.5052800039461298, "grad_norm": 0.1533203125, "learning_rate": 0.0018286520913350273, "loss": 1.202, "step": 5762 }, { "epoch": 0.5053676957205044, "grad_norm": 0.06494140625, "learning_rate": 0.001828247983469904, "loss": 1.2138, "step": 5763 }, { "epoch": 0.505455387494879, "grad_norm": 0.1162109375, "learning_rate": 0.0018278438593490238, "loss": 1.289, "step": 5764 }, { "epoch": 0.5055430792692535, "grad_norm": 0.0849609375, "learning_rate": 0.0018274397190092438, "loss": 1.216, "step": 5765 }, { "epoch": 0.5056307710436281, "grad_norm": 0.053955078125, "learning_rate": 0.0018270355624874196, "loss": 1.2376, "step": 5766 }, { "epoch": 0.5057184628180026, "grad_norm": 0.0830078125, "learning_rate": 0.001826631389820409, "loss": 1.2106, "step": 5767 }, { "epoch": 0.5058061545923771, "grad_norm": 0.07373046875, "learning_rate": 0.0018262272010450707, "loss": 1.2127, "step": 5768 }, { "epoch": 0.5058938463667516, "grad_norm": 0.05078125, "learning_rate": 0.0018258229961982675, "loss": 1.19, "step": 5769 }, { "epoch": 0.5059815381411262, "grad_norm": 0.083984375, "learning_rate": 0.0018254187753168605, "loss": 1.2626, "step": 5770 }, { "epoch": 0.5060692299155007, "grad_norm": 0.054443359375, "learning_rate": 0.001825014538437714, "loss": 1.1905, "step": 5771 }, { "epoch": 0.5061569216898752, "grad_norm": 0.052734375, "learning_rate": 0.0018246102855976934, "loss": 1.2319, "step": 5772 }, { "epoch": 0.5062446134642499, "grad_norm": 0.050537109375, "learning_rate": 0.0018242060168336656, "loss": 1.1685, "step": 5773 }, { "epoch": 0.5063323052386244, "grad_norm": 0.0556640625, "learning_rate": 0.0018238017321824994, "loss": 1.2272, "step": 5774 }, { "epoch": 0.5064199970129989, "grad_norm": 0.0498046875, "learning_rate": 0.0018233974316810631, "loss": 1.1756, "step": 5775 }, { "epoch": 0.5065076887873735, "grad_norm": 0.080078125, "learning_rate": 0.0018229931153662293, "loss": 1.2088, "step": 5776 }, { "epoch": 0.506595380561748, "grad_norm": 0.05517578125, "learning_rate": 0.0018225887832748694, "loss": 1.2268, "step": 5777 }, { "epoch": 0.5066830723361225, "grad_norm": 0.057861328125, "learning_rate": 0.0018221844354438582, "loss": 1.2466, "step": 5778 }, { "epoch": 0.5067707641104972, "grad_norm": 0.056884765625, "learning_rate": 0.0018217800719100706, "loss": 1.2156, "step": 5779 }, { "epoch": 0.5068584558848717, "grad_norm": 0.055419921875, "learning_rate": 0.0018213756927103846, "loss": 1.1758, "step": 5780 }, { "epoch": 0.5069461476592462, "grad_norm": 0.05126953125, "learning_rate": 0.0018209712978816772, "loss": 1.1806, "step": 5781 }, { "epoch": 0.5070338394336208, "grad_norm": 0.04833984375, "learning_rate": 0.0018205668874608288, "loss": 1.212, "step": 5782 }, { "epoch": 0.5071215312079953, "grad_norm": 0.0615234375, "learning_rate": 0.0018201624614847213, "loss": 1.2289, "step": 5783 }, { "epoch": 0.5072092229823698, "grad_norm": 0.051025390625, "learning_rate": 0.0018197580199902361, "loss": 1.1982, "step": 5784 }, { "epoch": 0.5072969147567444, "grad_norm": 0.05078125, "learning_rate": 0.0018193535630142577, "loss": 1.1555, "step": 5785 }, { "epoch": 0.5073846065311189, "grad_norm": 0.061279296875, "learning_rate": 0.0018189490905936713, "loss": 1.204, "step": 5786 }, { "epoch": 0.5074722983054935, "grad_norm": 0.05078125, "learning_rate": 0.0018185446027653644, "loss": 1.216, "step": 5787 }, { "epoch": 0.5075599900798681, "grad_norm": 0.056396484375, "learning_rate": 0.0018181400995662244, "loss": 1.1803, "step": 5788 }, { "epoch": 0.5076476818542426, "grad_norm": 0.0595703125, "learning_rate": 0.0018177355810331417, "loss": 1.1111, "step": 5789 }, { "epoch": 0.5077353736286171, "grad_norm": 0.050048828125, "learning_rate": 0.0018173310472030068, "loss": 1.2101, "step": 5790 }, { "epoch": 0.5078230654029917, "grad_norm": 0.0634765625, "learning_rate": 0.001816926498112713, "loss": 1.2231, "step": 5791 }, { "epoch": 0.5079107571773662, "grad_norm": 0.0810546875, "learning_rate": 0.0018165219337991528, "loss": 1.2315, "step": 5792 }, { "epoch": 0.5079984489517407, "grad_norm": 0.08837890625, "learning_rate": 0.0018161173542992222, "loss": 1.2744, "step": 5793 }, { "epoch": 0.5080861407261152, "grad_norm": 0.0615234375, "learning_rate": 0.0018157127596498182, "loss": 1.1489, "step": 5794 }, { "epoch": 0.5081738325004899, "grad_norm": 0.109375, "learning_rate": 0.0018153081498878375, "loss": 1.1834, "step": 5795 }, { "epoch": 0.5082615242748644, "grad_norm": 0.054931640625, "learning_rate": 0.0018149035250501807, "loss": 1.1888, "step": 5796 }, { "epoch": 0.5083492160492389, "grad_norm": 0.109375, "learning_rate": 0.001814498885173748, "loss": 1.2029, "step": 5797 }, { "epoch": 0.5084369078236135, "grad_norm": 0.06396484375, "learning_rate": 0.001814094230295442, "loss": 1.1493, "step": 5798 }, { "epoch": 0.508524599597988, "grad_norm": 0.1591796875, "learning_rate": 0.0018136895604521656, "loss": 1.2295, "step": 5799 }, { "epoch": 0.5086122913723625, "grad_norm": 0.07373046875, "learning_rate": 0.0018132848756808237, "loss": 1.145, "step": 5800 }, { "epoch": 0.5086999831467371, "grad_norm": 0.1357421875, "learning_rate": 0.001812880176018323, "loss": 1.2434, "step": 5801 }, { "epoch": 0.5087876749211117, "grad_norm": 0.0615234375, "learning_rate": 0.0018124754615015705, "loss": 1.1749, "step": 5802 }, { "epoch": 0.5088753666954862, "grad_norm": 0.07275390625, "learning_rate": 0.0018120707321674751, "loss": 1.1857, "step": 5803 }, { "epoch": 0.5089630584698608, "grad_norm": 0.09375, "learning_rate": 0.0018116659880529472, "loss": 1.2313, "step": 5804 }, { "epoch": 0.5090507502442353, "grad_norm": 0.051025390625, "learning_rate": 0.0018112612291948994, "loss": 1.1616, "step": 5805 }, { "epoch": 0.5091384420186098, "grad_norm": 0.0810546875, "learning_rate": 0.0018108564556302425, "loss": 1.1898, "step": 5806 }, { "epoch": 0.5092261337929844, "grad_norm": 0.0712890625, "learning_rate": 0.0018104516673958931, "loss": 1.1804, "step": 5807 }, { "epoch": 0.5093138255673589, "grad_norm": 0.0625, "learning_rate": 0.001810046864528766, "loss": 1.1795, "step": 5808 }, { "epoch": 0.5094015173417334, "grad_norm": 0.060546875, "learning_rate": 0.0018096420470657777, "loss": 1.217, "step": 5809 }, { "epoch": 0.5094892091161081, "grad_norm": 0.052490234375, "learning_rate": 0.0018092372150438464, "loss": 1.2501, "step": 5810 }, { "epoch": 0.5095769008904826, "grad_norm": 0.07177734375, "learning_rate": 0.001808832368499892, "loss": 1.1964, "step": 5811 }, { "epoch": 0.5096645926648571, "grad_norm": 0.119140625, "learning_rate": 0.0018084275074708364, "loss": 1.2247, "step": 5812 }, { "epoch": 0.5097522844392317, "grad_norm": 0.07958984375, "learning_rate": 0.0018080226319936005, "loss": 1.1531, "step": 5813 }, { "epoch": 0.5098399762136062, "grad_norm": 0.146484375, "learning_rate": 0.0018076177421051088, "loss": 1.1754, "step": 5814 }, { "epoch": 0.5099276679879807, "grad_norm": 0.0615234375, "learning_rate": 0.001807212837842286, "loss": 1.2848, "step": 5815 }, { "epoch": 0.5100153597623552, "grad_norm": 0.10888671875, "learning_rate": 0.001806807919242058, "loss": 1.2323, "step": 5816 }, { "epoch": 0.5101030515367299, "grad_norm": 0.05712890625, "learning_rate": 0.0018064029863413527, "loss": 1.1981, "step": 5817 }, { "epoch": 0.5101907433111044, "grad_norm": 0.08935546875, "learning_rate": 0.0018059980391770993, "loss": 1.1402, "step": 5818 }, { "epoch": 0.5102784350854789, "grad_norm": 0.07275390625, "learning_rate": 0.0018055930777862265, "loss": 1.1969, "step": 5819 }, { "epoch": 0.5103661268598535, "grad_norm": 0.056884765625, "learning_rate": 0.0018051881022056672, "loss": 1.1924, "step": 5820 }, { "epoch": 0.510453818634228, "grad_norm": 0.08349609375, "learning_rate": 0.0018047831124723534, "loss": 1.1866, "step": 5821 }, { "epoch": 0.5105415104086025, "grad_norm": 0.054931640625, "learning_rate": 0.0018043781086232195, "loss": 1.2312, "step": 5822 }, { "epoch": 0.5106292021829771, "grad_norm": 0.056884765625, "learning_rate": 0.0018039730906952004, "loss": 1.2035, "step": 5823 }, { "epoch": 0.5107168939573516, "grad_norm": 0.07958984375, "learning_rate": 0.0018035680587252328, "loss": 1.2893, "step": 5824 }, { "epoch": 0.5108045857317262, "grad_norm": 0.056884765625, "learning_rate": 0.0018031630127502553, "loss": 1.1707, "step": 5825 }, { "epoch": 0.5108922775061008, "grad_norm": 0.10205078125, "learning_rate": 0.0018027579528072058, "loss": 1.1873, "step": 5826 }, { "epoch": 0.5109799692804753, "grad_norm": 0.055419921875, "learning_rate": 0.0018023528789330257, "loss": 1.2429, "step": 5827 }, { "epoch": 0.5110676610548498, "grad_norm": 0.11767578125, "learning_rate": 0.0018019477911646558, "loss": 1.211, "step": 5828 }, { "epoch": 0.5111553528292244, "grad_norm": 0.0947265625, "learning_rate": 0.0018015426895390402, "loss": 1.2353, "step": 5829 }, { "epoch": 0.5112430446035989, "grad_norm": 0.0927734375, "learning_rate": 0.001801137574093122, "loss": 1.2151, "step": 5830 }, { "epoch": 0.5113307363779734, "grad_norm": 0.1044921875, "learning_rate": 0.0018007324448638482, "loss": 1.1721, "step": 5831 }, { "epoch": 0.5114184281523481, "grad_norm": 0.052001953125, "learning_rate": 0.0018003273018881631, "loss": 1.2858, "step": 5832 }, { "epoch": 0.5115061199267226, "grad_norm": 0.09375, "learning_rate": 0.0017999221452030174, "loss": 1.2635, "step": 5833 }, { "epoch": 0.5115938117010971, "grad_norm": 0.07763671875, "learning_rate": 0.0017995169748453584, "loss": 1.1876, "step": 5834 }, { "epoch": 0.5116815034754717, "grad_norm": 0.060546875, "learning_rate": 0.0017991117908521374, "loss": 1.2126, "step": 5835 }, { "epoch": 0.5117691952498462, "grad_norm": 0.126953125, "learning_rate": 0.0017987065932603065, "loss": 1.1528, "step": 5836 }, { "epoch": 0.5118568870242207, "grad_norm": 0.11328125, "learning_rate": 0.0017983013821068175, "loss": 1.2313, "step": 5837 }, { "epoch": 0.5119445787985953, "grad_norm": 0.1259765625, "learning_rate": 0.0017978961574286255, "loss": 1.2653, "step": 5838 }, { "epoch": 0.5120322705729698, "grad_norm": 0.15234375, "learning_rate": 0.0017974909192626858, "loss": 1.2775, "step": 5839 }, { "epoch": 0.5121199623473444, "grad_norm": 0.0908203125, "learning_rate": 0.0017970856676459547, "loss": 1.22, "step": 5840 }, { "epoch": 0.5122076541217189, "grad_norm": 0.09375, "learning_rate": 0.0017966804026153906, "loss": 1.1973, "step": 5841 }, { "epoch": 0.5122953458960935, "grad_norm": 0.0703125, "learning_rate": 0.001796275124207953, "loss": 1.235, "step": 5842 }, { "epoch": 0.512383037670468, "grad_norm": 0.06201171875, "learning_rate": 0.001795869832460601, "loss": 1.2421, "step": 5843 }, { "epoch": 0.5124707294448425, "grad_norm": 0.126953125, "learning_rate": 0.001795464527410297, "loss": 1.1367, "step": 5844 }, { "epoch": 0.5125584212192171, "grad_norm": 0.0634765625, "learning_rate": 0.0017950592090940035, "loss": 1.1823, "step": 5845 }, { "epoch": 0.5126461129935916, "grad_norm": 0.076171875, "learning_rate": 0.0017946538775486848, "loss": 1.2318, "step": 5846 }, { "epoch": 0.5127338047679662, "grad_norm": 0.0625, "learning_rate": 0.0017942485328113057, "loss": 1.2445, "step": 5847 }, { "epoch": 0.5128214965423408, "grad_norm": 0.0498046875, "learning_rate": 0.0017938431749188327, "loss": 1.1649, "step": 5848 }, { "epoch": 0.5129091883167153, "grad_norm": 0.06884765625, "learning_rate": 0.0017934378039082342, "loss": 1.2243, "step": 5849 }, { "epoch": 0.5129968800910898, "grad_norm": 0.052001953125, "learning_rate": 0.0017930324198164775, "loss": 1.1657, "step": 5850 }, { "epoch": 0.5130845718654644, "grad_norm": 0.052734375, "learning_rate": 0.0017926270226805334, "loss": 1.1584, "step": 5851 }, { "epoch": 0.5131722636398389, "grad_norm": 0.052490234375, "learning_rate": 0.0017922216125373733, "loss": 1.2031, "step": 5852 }, { "epoch": 0.5132599554142134, "grad_norm": 0.06298828125, "learning_rate": 0.0017918161894239692, "loss": 1.1273, "step": 5853 }, { "epoch": 0.513347647188588, "grad_norm": 0.054443359375, "learning_rate": 0.0017914107533772948, "loss": 1.2437, "step": 5854 }, { "epoch": 0.5134353389629626, "grad_norm": 0.0537109375, "learning_rate": 0.0017910053044343248, "loss": 1.1618, "step": 5855 }, { "epoch": 0.5135230307373371, "grad_norm": 0.056396484375, "learning_rate": 0.001790599842632035, "loss": 1.2116, "step": 5856 }, { "epoch": 0.5136107225117117, "grad_norm": 0.0576171875, "learning_rate": 0.001790194368007402, "loss": 1.2689, "step": 5857 }, { "epoch": 0.5136984142860862, "grad_norm": 0.05810546875, "learning_rate": 0.0017897888805974052, "loss": 1.2372, "step": 5858 }, { "epoch": 0.5137861060604607, "grad_norm": 0.051513671875, "learning_rate": 0.0017893833804390235, "loss": 1.1832, "step": 5859 }, { "epoch": 0.5138737978348353, "grad_norm": 0.059326171875, "learning_rate": 0.0017889778675692367, "loss": 1.2283, "step": 5860 }, { "epoch": 0.5139614896092098, "grad_norm": 0.07080078125, "learning_rate": 0.0017885723420250272, "loss": 1.1838, "step": 5861 }, { "epoch": 0.5140491813835844, "grad_norm": 0.06787109375, "learning_rate": 0.0017881668038433785, "loss": 1.1976, "step": 5862 }, { "epoch": 0.514136873157959, "grad_norm": 0.057373046875, "learning_rate": 0.0017877612530612732, "loss": 1.2371, "step": 5863 }, { "epoch": 0.5142245649323335, "grad_norm": 0.091796875, "learning_rate": 0.001787355689715697, "loss": 1.2485, "step": 5864 }, { "epoch": 0.514312256706708, "grad_norm": 0.061279296875, "learning_rate": 0.0017869501138436372, "loss": 1.1813, "step": 5865 }, { "epoch": 0.5143999484810825, "grad_norm": 0.061279296875, "learning_rate": 0.00178654452548208, "loss": 1.2022, "step": 5866 }, { "epoch": 0.5144876402554571, "grad_norm": 0.0771484375, "learning_rate": 0.0017861389246680156, "loss": 1.1106, "step": 5867 }, { "epoch": 0.5145753320298316, "grad_norm": 0.05078125, "learning_rate": 0.001785733311438432, "loss": 1.2337, "step": 5868 }, { "epoch": 0.5146630238042061, "grad_norm": 0.0654296875, "learning_rate": 0.0017853276858303209, "loss": 1.2172, "step": 5869 }, { "epoch": 0.5147507155785808, "grad_norm": 0.09423828125, "learning_rate": 0.0017849220478806738, "loss": 1.1743, "step": 5870 }, { "epoch": 0.5148384073529553, "grad_norm": 0.07861328125, "learning_rate": 0.0017845163976264846, "loss": 1.2035, "step": 5871 }, { "epoch": 0.5149260991273298, "grad_norm": 0.050537109375, "learning_rate": 0.0017841107351047472, "loss": 1.1845, "step": 5872 }, { "epoch": 0.5150137909017044, "grad_norm": 0.04931640625, "learning_rate": 0.0017837050603524568, "loss": 1.2109, "step": 5873 }, { "epoch": 0.5151014826760789, "grad_norm": 0.0712890625, "learning_rate": 0.0017832993734066102, "loss": 1.2439, "step": 5874 }, { "epoch": 0.5151891744504534, "grad_norm": 0.0849609375, "learning_rate": 0.001782893674304205, "loss": 1.2171, "step": 5875 }, { "epoch": 0.515276866224828, "grad_norm": 0.09375, "learning_rate": 0.0017824879630822397, "loss": 1.22, "step": 5876 }, { "epoch": 0.5153645579992026, "grad_norm": 0.07177734375, "learning_rate": 0.0017820822397777137, "loss": 1.1902, "step": 5877 }, { "epoch": 0.5154522497735771, "grad_norm": 0.0693359375, "learning_rate": 0.001781676504427629, "loss": 1.1781, "step": 5878 }, { "epoch": 0.5155399415479517, "grad_norm": 0.056396484375, "learning_rate": 0.0017812707570689867, "loss": 1.1947, "step": 5879 }, { "epoch": 0.5156276333223262, "grad_norm": 0.0693359375, "learning_rate": 0.0017808649977387903, "loss": 1.2399, "step": 5880 }, { "epoch": 0.5157153250967007, "grad_norm": 0.0556640625, "learning_rate": 0.0017804592264740438, "loss": 1.2499, "step": 5881 }, { "epoch": 0.5158030168710753, "grad_norm": 0.048828125, "learning_rate": 0.001780053443311753, "loss": 1.2116, "step": 5882 }, { "epoch": 0.5158907086454498, "grad_norm": 0.06640625, "learning_rate": 0.0017796476482889233, "loss": 1.2481, "step": 5883 }, { "epoch": 0.5159784004198243, "grad_norm": 0.07421875, "learning_rate": 0.0017792418414425634, "loss": 1.1495, "step": 5884 }, { "epoch": 0.516066092194199, "grad_norm": 0.052978515625, "learning_rate": 0.0017788360228096812, "loss": 1.1576, "step": 5885 }, { "epoch": 0.5161537839685735, "grad_norm": 0.054931640625, "learning_rate": 0.0017784301924272856, "loss": 1.1936, "step": 5886 }, { "epoch": 0.516241475742948, "grad_norm": 0.057373046875, "learning_rate": 0.0017780243503323888, "loss": 1.2703, "step": 5887 }, { "epoch": 0.5163291675173225, "grad_norm": 0.076171875, "learning_rate": 0.001777618496562001, "loss": 1.1739, "step": 5888 }, { "epoch": 0.5164168592916971, "grad_norm": 0.1103515625, "learning_rate": 0.0017772126311531363, "loss": 1.2081, "step": 5889 }, { "epoch": 0.5165045510660716, "grad_norm": 0.1259765625, "learning_rate": 0.0017768067541428078, "loss": 1.2648, "step": 5890 }, { "epoch": 0.5165922428404461, "grad_norm": 0.07080078125, "learning_rate": 0.001776400865568031, "loss": 1.1837, "step": 5891 }, { "epoch": 0.5166799346148208, "grad_norm": 0.052978515625, "learning_rate": 0.0017759949654658207, "loss": 1.2042, "step": 5892 }, { "epoch": 0.5167676263891953, "grad_norm": 0.058837890625, "learning_rate": 0.0017755890538731958, "loss": 1.1853, "step": 5893 }, { "epoch": 0.5168553181635698, "grad_norm": 0.05224609375, "learning_rate": 0.0017751831308271732, "loss": 1.2413, "step": 5894 }, { "epoch": 0.5169430099379444, "grad_norm": 0.047607421875, "learning_rate": 0.0017747771963647716, "loss": 1.1754, "step": 5895 }, { "epoch": 0.5170307017123189, "grad_norm": 0.072265625, "learning_rate": 0.0017743712505230123, "loss": 1.2563, "step": 5896 }, { "epoch": 0.5171183934866934, "grad_norm": 0.052001953125, "learning_rate": 0.0017739652933389158, "loss": 1.1686, "step": 5897 }, { "epoch": 0.517206085261068, "grad_norm": 0.0556640625, "learning_rate": 0.0017735593248495048, "loss": 1.2398, "step": 5898 }, { "epoch": 0.5172937770354425, "grad_norm": 0.059814453125, "learning_rate": 0.001773153345091802, "loss": 1.163, "step": 5899 }, { "epoch": 0.5173814688098171, "grad_norm": 0.050537109375, "learning_rate": 0.0017727473541028326, "loss": 1.1586, "step": 5900 }, { "epoch": 0.5174691605841917, "grad_norm": 0.052978515625, "learning_rate": 0.0017723413519196212, "loss": 1.3056, "step": 5901 }, { "epoch": 0.5175568523585662, "grad_norm": 0.049072265625, "learning_rate": 0.0017719353385791943, "loss": 1.1702, "step": 5902 }, { "epoch": 0.5176445441329407, "grad_norm": 0.055419921875, "learning_rate": 0.001771529314118579, "loss": 1.1576, "step": 5903 }, { "epoch": 0.5177322359073153, "grad_norm": 0.06103515625, "learning_rate": 0.0017711232785748042, "loss": 1.2094, "step": 5904 }, { "epoch": 0.5178199276816898, "grad_norm": 0.060302734375, "learning_rate": 0.0017707172319848992, "loss": 1.1956, "step": 5905 }, { "epoch": 0.5179076194560643, "grad_norm": 0.0927734375, "learning_rate": 0.0017703111743858939, "loss": 1.1987, "step": 5906 }, { "epoch": 0.517995311230439, "grad_norm": 0.0615234375, "learning_rate": 0.0017699051058148207, "loss": 1.225, "step": 5907 }, { "epoch": 0.5180830030048135, "grad_norm": 0.095703125, "learning_rate": 0.0017694990263087103, "loss": 1.2063, "step": 5908 }, { "epoch": 0.518170694779188, "grad_norm": 0.06884765625, "learning_rate": 0.001769092935904598, "loss": 1.2529, "step": 5909 }, { "epoch": 0.5182583865535626, "grad_norm": 0.07666015625, "learning_rate": 0.0017686868346395177, "loss": 1.2071, "step": 5910 }, { "epoch": 0.5183460783279371, "grad_norm": 0.08984375, "learning_rate": 0.0017682807225505037, "loss": 1.2922, "step": 5911 }, { "epoch": 0.5184337701023116, "grad_norm": 0.0537109375, "learning_rate": 0.0017678745996745936, "loss": 1.1418, "step": 5912 }, { "epoch": 0.5185214618766861, "grad_norm": 0.06591796875, "learning_rate": 0.001767468466048824, "loss": 1.1313, "step": 5913 }, { "epoch": 0.5186091536510608, "grad_norm": 0.1279296875, "learning_rate": 0.0017670623217102338, "loss": 1.1904, "step": 5914 }, { "epoch": 0.5186968454254353, "grad_norm": 0.05712890625, "learning_rate": 0.0017666561666958619, "loss": 1.1321, "step": 5915 }, { "epoch": 0.5187845371998098, "grad_norm": 0.1337890625, "learning_rate": 0.0017662500010427488, "loss": 1.215, "step": 5916 }, { "epoch": 0.5188722289741844, "grad_norm": 0.0859375, "learning_rate": 0.0017658438247879354, "loss": 1.222, "step": 5917 }, { "epoch": 0.5189599207485589, "grad_norm": 0.1484375, "learning_rate": 0.001765437637968465, "loss": 1.1958, "step": 5918 }, { "epoch": 0.5190476125229334, "grad_norm": 0.09228515625, "learning_rate": 0.0017650314406213795, "loss": 1.2208, "step": 5919 }, { "epoch": 0.519135304297308, "grad_norm": 0.08447265625, "learning_rate": 0.0017646252327837236, "loss": 1.2243, "step": 5920 }, { "epoch": 0.5192229960716825, "grad_norm": 0.1025390625, "learning_rate": 0.0017642190144925422, "loss": 1.2359, "step": 5921 }, { "epoch": 0.519310687846057, "grad_norm": 0.0751953125, "learning_rate": 0.001763812785784882, "loss": 1.1691, "step": 5922 }, { "epoch": 0.5193983796204317, "grad_norm": 0.05126953125, "learning_rate": 0.001763406546697789, "loss": 1.2049, "step": 5923 }, { "epoch": 0.5194860713948062, "grad_norm": 0.08056640625, "learning_rate": 0.001763000297268312, "loss": 1.2328, "step": 5924 }, { "epoch": 0.5195737631691807, "grad_norm": 0.07568359375, "learning_rate": 0.0017625940375334997, "loss": 1.174, "step": 5925 }, { "epoch": 0.5196614549435553, "grad_norm": 0.07763671875, "learning_rate": 0.0017621877675304016, "loss": 1.1733, "step": 5926 }, { "epoch": 0.5197491467179298, "grad_norm": 0.0703125, "learning_rate": 0.001761781487296069, "loss": 1.2273, "step": 5927 }, { "epoch": 0.5198368384923043, "grad_norm": 0.055908203125, "learning_rate": 0.001761375196867553, "loss": 1.2447, "step": 5928 }, { "epoch": 0.519924530266679, "grad_norm": 0.078125, "learning_rate": 0.0017609688962819069, "loss": 1.1925, "step": 5929 }, { "epoch": 0.5200122220410535, "grad_norm": 0.05712890625, "learning_rate": 0.0017605625855761833, "loss": 1.1648, "step": 5930 }, { "epoch": 0.520099913815428, "grad_norm": 0.053955078125, "learning_rate": 0.0017601562647874378, "loss": 1.1648, "step": 5931 }, { "epoch": 0.5201876055898026, "grad_norm": 0.0869140625, "learning_rate": 0.0017597499339527252, "loss": 1.1967, "step": 5932 }, { "epoch": 0.5202752973641771, "grad_norm": 0.06640625, "learning_rate": 0.001759343593109102, "loss": 1.195, "step": 5933 }, { "epoch": 0.5203629891385516, "grad_norm": 0.05908203125, "learning_rate": 0.0017589372422936252, "loss": 1.1863, "step": 5934 }, { "epoch": 0.5204506809129262, "grad_norm": 0.1328125, "learning_rate": 0.0017585308815433536, "loss": 1.2006, "step": 5935 }, { "epoch": 0.5205383726873007, "grad_norm": 0.0576171875, "learning_rate": 0.0017581245108953454, "loss": 1.201, "step": 5936 }, { "epoch": 0.5206260644616753, "grad_norm": 0.09765625, "learning_rate": 0.001757718130386661, "loss": 1.1938, "step": 5937 }, { "epoch": 0.5207137562360498, "grad_norm": 0.0693359375, "learning_rate": 0.001757311740054361, "loss": 1.2201, "step": 5938 }, { "epoch": 0.5208014480104244, "grad_norm": 0.0595703125, "learning_rate": 0.0017569053399355075, "loss": 1.2868, "step": 5939 }, { "epoch": 0.5208891397847989, "grad_norm": 0.055908203125, "learning_rate": 0.001756498930067163, "loss": 1.1847, "step": 5940 }, { "epoch": 0.5209768315591734, "grad_norm": 0.068359375, "learning_rate": 0.001756092510486391, "loss": 1.178, "step": 5941 }, { "epoch": 0.521064523333548, "grad_norm": 0.0595703125, "learning_rate": 0.0017556860812302558, "loss": 1.1574, "step": 5942 }, { "epoch": 0.5211522151079225, "grad_norm": 0.05859375, "learning_rate": 0.0017552796423358228, "loss": 1.2565, "step": 5943 }, { "epoch": 0.521239906882297, "grad_norm": 0.0693359375, "learning_rate": 0.0017548731938401587, "loss": 1.1587, "step": 5944 }, { "epoch": 0.5213275986566717, "grad_norm": 0.078125, "learning_rate": 0.0017544667357803296, "loss": 1.2586, "step": 5945 }, { "epoch": 0.5214152904310462, "grad_norm": 0.07958984375, "learning_rate": 0.0017540602681934037, "loss": 1.1857, "step": 5946 }, { "epoch": 0.5215029822054207, "grad_norm": 0.09814453125, "learning_rate": 0.0017536537911164505, "loss": 1.2428, "step": 5947 }, { "epoch": 0.5215906739797953, "grad_norm": 0.061279296875, "learning_rate": 0.0017532473045865386, "loss": 1.2164, "step": 5948 }, { "epoch": 0.5216783657541698, "grad_norm": 0.0908203125, "learning_rate": 0.0017528408086407395, "loss": 1.2195, "step": 5949 }, { "epoch": 0.5217660575285443, "grad_norm": 0.0517578125, "learning_rate": 0.001752434303316124, "loss": 1.2237, "step": 5950 }, { "epoch": 0.521853749302919, "grad_norm": 0.05712890625, "learning_rate": 0.0017520277886497642, "loss": 1.1773, "step": 5951 }, { "epoch": 0.5219414410772935, "grad_norm": 0.0966796875, "learning_rate": 0.0017516212646787339, "loss": 1.2018, "step": 5952 }, { "epoch": 0.522029132851668, "grad_norm": 0.09326171875, "learning_rate": 0.001751214731440106, "loss": 1.2165, "step": 5953 }, { "epoch": 0.5221168246260426, "grad_norm": 0.10986328125, "learning_rate": 0.0017508081889709565, "loss": 1.2278, "step": 5954 }, { "epoch": 0.5222045164004171, "grad_norm": 0.0732421875, "learning_rate": 0.00175040163730836, "loss": 1.2454, "step": 5955 }, { "epoch": 0.5222922081747916, "grad_norm": 0.1328125, "learning_rate": 0.0017499950764893937, "loss": 1.2108, "step": 5956 }, { "epoch": 0.5223798999491662, "grad_norm": 0.050048828125, "learning_rate": 0.0017495885065511342, "loss": 1.2272, "step": 5957 }, { "epoch": 0.5224675917235407, "grad_norm": 0.09375, "learning_rate": 0.0017491819275306602, "loss": 1.2018, "step": 5958 }, { "epoch": 0.5225552834979152, "grad_norm": 0.1162109375, "learning_rate": 0.0017487753394650498, "loss": 1.2055, "step": 5959 }, { "epoch": 0.5226429752722898, "grad_norm": 0.09228515625, "learning_rate": 0.0017483687423913842, "loss": 1.2245, "step": 5960 }, { "epoch": 0.5227306670466644, "grad_norm": 0.11767578125, "learning_rate": 0.0017479621363467432, "loss": 1.2052, "step": 5961 }, { "epoch": 0.5228183588210389, "grad_norm": 0.0673828125, "learning_rate": 0.0017475555213682084, "loss": 1.2317, "step": 5962 }, { "epoch": 0.5229060505954134, "grad_norm": 0.10693359375, "learning_rate": 0.0017471488974928613, "loss": 1.1661, "step": 5963 }, { "epoch": 0.522993742369788, "grad_norm": 0.13671875, "learning_rate": 0.0017467422647577861, "loss": 1.1888, "step": 5964 }, { "epoch": 0.5230814341441625, "grad_norm": 0.07421875, "learning_rate": 0.0017463356232000657, "loss": 1.2355, "step": 5965 }, { "epoch": 0.523169125918537, "grad_norm": 0.11376953125, "learning_rate": 0.0017459289728567853, "loss": 1.2028, "step": 5966 }, { "epoch": 0.5232568176929117, "grad_norm": 0.06884765625, "learning_rate": 0.00174552231376503, "loss": 1.1421, "step": 5967 }, { "epoch": 0.5233445094672862, "grad_norm": 0.05419921875, "learning_rate": 0.0017451156459618864, "loss": 1.2064, "step": 5968 }, { "epoch": 0.5234322012416607, "grad_norm": 0.087890625, "learning_rate": 0.001744708969484442, "loss": 1.1968, "step": 5969 }, { "epoch": 0.5235198930160353, "grad_norm": 0.09716796875, "learning_rate": 0.001744302284369783, "loss": 1.2126, "step": 5970 }, { "epoch": 0.5236075847904098, "grad_norm": 0.06396484375, "learning_rate": 0.001743895590655, "loss": 1.2154, "step": 5971 }, { "epoch": 0.5236952765647843, "grad_norm": 0.08642578125, "learning_rate": 0.001743488888377181, "loss": 1.2485, "step": 5972 }, { "epoch": 0.5237829683391589, "grad_norm": 0.0634765625, "learning_rate": 0.001743082177573417, "loss": 1.2214, "step": 5973 }, { "epoch": 0.5238706601135334, "grad_norm": 0.05419921875, "learning_rate": 0.0017426754582807987, "loss": 1.1443, "step": 5974 }, { "epoch": 0.523958351887908, "grad_norm": 0.055419921875, "learning_rate": 0.0017422687305364175, "loss": 1.2245, "step": 5975 }, { "epoch": 0.5240460436622826, "grad_norm": 0.060791015625, "learning_rate": 0.0017418619943773664, "loss": 1.2011, "step": 5976 }, { "epoch": 0.5241337354366571, "grad_norm": 0.09619140625, "learning_rate": 0.0017414552498407389, "loss": 1.2399, "step": 5977 }, { "epoch": 0.5242214272110316, "grad_norm": 0.06982421875, "learning_rate": 0.0017410484969636287, "loss": 1.2207, "step": 5978 }, { "epoch": 0.5243091189854062, "grad_norm": 0.06591796875, "learning_rate": 0.0017406417357831304, "loss": 1.1186, "step": 5979 }, { "epoch": 0.5243968107597807, "grad_norm": 0.1015625, "learning_rate": 0.0017402349663363395, "loss": 1.1998, "step": 5980 }, { "epoch": 0.5244845025341552, "grad_norm": 0.057373046875, "learning_rate": 0.0017398281886603529, "loss": 1.1944, "step": 5981 }, { "epoch": 0.5245721943085299, "grad_norm": 0.061279296875, "learning_rate": 0.0017394214027922675, "loss": 1.2566, "step": 5982 }, { "epoch": 0.5246598860829044, "grad_norm": 0.11767578125, "learning_rate": 0.0017390146087691808, "loss": 1.2032, "step": 5983 }, { "epoch": 0.5247475778572789, "grad_norm": 0.060302734375, "learning_rate": 0.0017386078066281916, "loss": 1.2575, "step": 5984 }, { "epoch": 0.5248352696316534, "grad_norm": 0.1279296875, "learning_rate": 0.001738200996406399, "loss": 1.2187, "step": 5985 }, { "epoch": 0.524922961406028, "grad_norm": 0.07421875, "learning_rate": 0.001737794178140904, "loss": 1.1844, "step": 5986 }, { "epoch": 0.5250106531804025, "grad_norm": 0.051513671875, "learning_rate": 0.0017373873518688058, "loss": 1.2208, "step": 5987 }, { "epoch": 0.525098344954777, "grad_norm": 0.10595703125, "learning_rate": 0.0017369805176272069, "loss": 1.2599, "step": 5988 }, { "epoch": 0.5251860367291517, "grad_norm": 0.12353515625, "learning_rate": 0.0017365736754532096, "loss": 1.2202, "step": 5989 }, { "epoch": 0.5252737285035262, "grad_norm": 0.06591796875, "learning_rate": 0.001736166825383916, "loss": 1.1879, "step": 5990 }, { "epoch": 0.5253614202779007, "grad_norm": 0.15234375, "learning_rate": 0.0017357599674564309, "loss": 1.1526, "step": 5991 }, { "epoch": 0.5254491120522753, "grad_norm": 0.09912109375, "learning_rate": 0.001735353101707858, "loss": 1.1673, "step": 5992 }, { "epoch": 0.5255368038266498, "grad_norm": 0.08349609375, "learning_rate": 0.0017349462281753022, "loss": 1.2269, "step": 5993 }, { "epoch": 0.5256244956010243, "grad_norm": 0.11669921875, "learning_rate": 0.0017345393468958703, "loss": 1.2104, "step": 5994 }, { "epoch": 0.5257121873753989, "grad_norm": 0.07763671875, "learning_rate": 0.0017341324579066683, "loss": 1.1731, "step": 5995 }, { "epoch": 0.5257998791497734, "grad_norm": 0.07861328125, "learning_rate": 0.0017337255612448023, "loss": 1.1912, "step": 5996 }, { "epoch": 0.525887570924148, "grad_norm": 0.058349609375, "learning_rate": 0.0017333186569473823, "loss": 1.209, "step": 5997 }, { "epoch": 0.5259752626985226, "grad_norm": 0.05419921875, "learning_rate": 0.0017329117450515154, "loss": 1.1623, "step": 5998 }, { "epoch": 0.5260629544728971, "grad_norm": 0.0537109375, "learning_rate": 0.0017325048255943112, "loss": 1.1972, "step": 5999 }, { "epoch": 0.5261506462472716, "grad_norm": 0.05615234375, "learning_rate": 0.0017320978986128802, "loss": 1.2089, "step": 6000 }, { "epoch": 0.5261506462472716, "eval_loss": 1.2098323106765747, "eval_runtime": 428.9595, "eval_samples_per_second": 33.679, "eval_steps_per_second": 8.42, "step": 6000 }, { "epoch": 0.5262383380216462, "grad_norm": 0.05322265625, "learning_rate": 0.0017316909641443329, "loss": 1.2199, "step": 6001 }, { "epoch": 0.5263260297960207, "grad_norm": 0.0966796875, "learning_rate": 0.0017312840222257803, "loss": 1.1658, "step": 6002 }, { "epoch": 0.5264137215703952, "grad_norm": 0.0576171875, "learning_rate": 0.001730877072894335, "loss": 1.163, "step": 6003 }, { "epoch": 0.5265014133447699, "grad_norm": 0.04833984375, "learning_rate": 0.001730470116187109, "loss": 1.1826, "step": 6004 }, { "epoch": 0.5265891051191444, "grad_norm": 0.11669921875, "learning_rate": 0.0017300631521412165, "loss": 1.2092, "step": 6005 }, { "epoch": 0.5266767968935189, "grad_norm": 0.054443359375, "learning_rate": 0.0017296561807937708, "loss": 1.1963, "step": 6006 }, { "epoch": 0.5267644886678935, "grad_norm": 0.05322265625, "learning_rate": 0.001729249202181887, "loss": 1.2497, "step": 6007 }, { "epoch": 0.526852180442268, "grad_norm": 0.095703125, "learning_rate": 0.0017288422163426814, "loss": 1.184, "step": 6008 }, { "epoch": 0.5269398722166425, "grad_norm": 0.08447265625, "learning_rate": 0.0017284352233132682, "loss": 1.2382, "step": 6009 }, { "epoch": 0.527027563991017, "grad_norm": 0.080078125, "learning_rate": 0.001728028223130766, "loss": 1.2172, "step": 6010 }, { "epoch": 0.5271152557653916, "grad_norm": 0.0947265625, "learning_rate": 0.001727621215832291, "loss": 1.1901, "step": 6011 }, { "epoch": 0.5272029475397662, "grad_norm": 0.054443359375, "learning_rate": 0.001727214201454961, "loss": 1.1645, "step": 6012 }, { "epoch": 0.5272906393141407, "grad_norm": 0.064453125, "learning_rate": 0.0017268071800358956, "loss": 1.1817, "step": 6013 }, { "epoch": 0.5273783310885153, "grad_norm": 0.058349609375, "learning_rate": 0.0017264001516122136, "loss": 1.2552, "step": 6014 }, { "epoch": 0.5274660228628898, "grad_norm": 0.0673828125, "learning_rate": 0.0017259931162210349, "loss": 1.1848, "step": 6015 }, { "epoch": 0.5275537146372643, "grad_norm": 0.052978515625, "learning_rate": 0.0017255860738994802, "loss": 1.1577, "step": 6016 }, { "epoch": 0.5276414064116389, "grad_norm": 0.09228515625, "learning_rate": 0.0017251790246846706, "loss": 1.2307, "step": 6017 }, { "epoch": 0.5277290981860134, "grad_norm": 0.056640625, "learning_rate": 0.001724771968613728, "loss": 1.1781, "step": 6018 }, { "epoch": 0.5278167899603879, "grad_norm": 0.0625, "learning_rate": 0.0017243649057237752, "loss": 1.2543, "step": 6019 }, { "epoch": 0.5279044817347626, "grad_norm": 0.061767578125, "learning_rate": 0.001723957836051935, "loss": 1.2322, "step": 6020 }, { "epoch": 0.5279921735091371, "grad_norm": 0.060546875, "learning_rate": 0.0017235507596353307, "loss": 1.1559, "step": 6021 }, { "epoch": 0.5280798652835116, "grad_norm": 0.0654296875, "learning_rate": 0.0017231436765110868, "loss": 1.2038, "step": 6022 }, { "epoch": 0.5281675570578862, "grad_norm": 0.060546875, "learning_rate": 0.001722736586716329, "loss": 1.2374, "step": 6023 }, { "epoch": 0.5282552488322607, "grad_norm": 0.0927734375, "learning_rate": 0.0017223294902881819, "loss": 1.1432, "step": 6024 }, { "epoch": 0.5283429406066352, "grad_norm": 0.06005859375, "learning_rate": 0.0017219223872637721, "loss": 1.1912, "step": 6025 }, { "epoch": 0.5284306323810098, "grad_norm": 0.08154296875, "learning_rate": 0.0017215152776802265, "loss": 1.2205, "step": 6026 }, { "epoch": 0.5285183241553844, "grad_norm": 0.06884765625, "learning_rate": 0.0017211081615746717, "loss": 1.2176, "step": 6027 }, { "epoch": 0.5286060159297589, "grad_norm": 0.05322265625, "learning_rate": 0.0017207010389842367, "loss": 1.1612, "step": 6028 }, { "epoch": 0.5286937077041335, "grad_norm": 0.049560546875, "learning_rate": 0.0017202939099460492, "loss": 1.2241, "step": 6029 }, { "epoch": 0.528781399478508, "grad_norm": 0.072265625, "learning_rate": 0.0017198867744972386, "loss": 1.2091, "step": 6030 }, { "epoch": 0.5288690912528825, "grad_norm": 0.06982421875, "learning_rate": 0.0017194796326749342, "loss": 1.243, "step": 6031 }, { "epoch": 0.528956783027257, "grad_norm": 0.053955078125, "learning_rate": 0.001719072484516267, "loss": 1.2303, "step": 6032 }, { "epoch": 0.5290444748016316, "grad_norm": 0.09228515625, "learning_rate": 0.0017186653300583677, "loss": 1.1895, "step": 6033 }, { "epoch": 0.5291321665760061, "grad_norm": 0.06884765625, "learning_rate": 0.0017182581693383672, "loss": 1.1881, "step": 6034 }, { "epoch": 0.5292198583503807, "grad_norm": 0.0703125, "learning_rate": 0.001717851002393398, "loss": 1.2227, "step": 6035 }, { "epoch": 0.5293075501247553, "grad_norm": 0.1142578125, "learning_rate": 0.0017174438292605922, "loss": 1.2155, "step": 6036 }, { "epoch": 0.5293952418991298, "grad_norm": 0.06005859375, "learning_rate": 0.0017170366499770842, "loss": 1.3038, "step": 6037 }, { "epoch": 0.5294829336735043, "grad_norm": 0.057861328125, "learning_rate": 0.0017166294645800063, "loss": 1.2013, "step": 6038 }, { "epoch": 0.5295706254478789, "grad_norm": 0.062255859375, "learning_rate": 0.001716222273106493, "loss": 1.1787, "step": 6039 }, { "epoch": 0.5296583172222534, "grad_norm": 0.053955078125, "learning_rate": 0.0017158150755936794, "loss": 1.1541, "step": 6040 }, { "epoch": 0.5297460089966279, "grad_norm": 0.052978515625, "learning_rate": 0.0017154078720787004, "loss": 1.1608, "step": 6041 }, { "epoch": 0.5298337007710026, "grad_norm": 0.051025390625, "learning_rate": 0.0017150006625986933, "loss": 1.1454, "step": 6042 }, { "epoch": 0.5299213925453771, "grad_norm": 0.0654296875, "learning_rate": 0.0017145934471907932, "loss": 1.1794, "step": 6043 }, { "epoch": 0.5300090843197516, "grad_norm": 0.050537109375, "learning_rate": 0.0017141862258921372, "loss": 1.1491, "step": 6044 }, { "epoch": 0.5300967760941262, "grad_norm": 0.052734375, "learning_rate": 0.0017137789987398632, "loss": 1.2121, "step": 6045 }, { "epoch": 0.5301844678685007, "grad_norm": 0.05517578125, "learning_rate": 0.0017133717657711097, "loss": 1.2174, "step": 6046 }, { "epoch": 0.5302721596428752, "grad_norm": 0.056640625, "learning_rate": 0.0017129645270230138, "loss": 1.1736, "step": 6047 }, { "epoch": 0.5303598514172498, "grad_norm": 0.056396484375, "learning_rate": 0.0017125572825327162, "loss": 1.2028, "step": 6048 }, { "epoch": 0.5304475431916243, "grad_norm": 0.07275390625, "learning_rate": 0.0017121500323373558, "loss": 1.1732, "step": 6049 }, { "epoch": 0.5305352349659989, "grad_norm": 0.0673828125, "learning_rate": 0.001711742776474073, "loss": 1.126, "step": 6050 }, { "epoch": 0.5306229267403735, "grad_norm": 0.058837890625, "learning_rate": 0.0017113355149800077, "loss": 1.2023, "step": 6051 }, { "epoch": 0.530710618514748, "grad_norm": 0.10107421875, "learning_rate": 0.0017109282478923024, "loss": 1.2058, "step": 6052 }, { "epoch": 0.5307983102891225, "grad_norm": 0.064453125, "learning_rate": 0.0017105209752480979, "loss": 1.228, "step": 6053 }, { "epoch": 0.5308860020634971, "grad_norm": 0.0693359375, "learning_rate": 0.001710113697084537, "loss": 1.1671, "step": 6054 }, { "epoch": 0.5309736938378716, "grad_norm": 0.053955078125, "learning_rate": 0.001709706413438762, "loss": 1.2221, "step": 6055 }, { "epoch": 0.5310613856122461, "grad_norm": 0.05224609375, "learning_rate": 0.0017092991243479161, "loss": 1.2083, "step": 6056 }, { "epoch": 0.5311490773866206, "grad_norm": 0.060791015625, "learning_rate": 0.0017088918298491435, "loss": 1.1553, "step": 6057 }, { "epoch": 0.5312367691609953, "grad_norm": 0.078125, "learning_rate": 0.0017084845299795875, "loss": 1.2471, "step": 6058 }, { "epoch": 0.5313244609353698, "grad_norm": 0.05224609375, "learning_rate": 0.0017080772247763938, "loss": 1.2414, "step": 6059 }, { "epoch": 0.5314121527097443, "grad_norm": 0.10791015625, "learning_rate": 0.0017076699142767068, "loss": 1.1817, "step": 6060 }, { "epoch": 0.5314998444841189, "grad_norm": 0.04833984375, "learning_rate": 0.0017072625985176727, "loss": 1.074, "step": 6061 }, { "epoch": 0.5315875362584934, "grad_norm": 0.080078125, "learning_rate": 0.0017068552775364374, "loss": 1.259, "step": 6062 }, { "epoch": 0.5316752280328679, "grad_norm": 0.07958984375, "learning_rate": 0.0017064479513701474, "loss": 1.156, "step": 6063 }, { "epoch": 0.5317629198072426, "grad_norm": 0.055419921875, "learning_rate": 0.0017060406200559502, "loss": 1.1567, "step": 6064 }, { "epoch": 0.5318506115816171, "grad_norm": 0.09375, "learning_rate": 0.001705633283630993, "loss": 1.2369, "step": 6065 }, { "epoch": 0.5319383033559916, "grad_norm": 0.078125, "learning_rate": 0.001705225942132424, "loss": 1.2076, "step": 6066 }, { "epoch": 0.5320259951303662, "grad_norm": 0.115234375, "learning_rate": 0.0017048185955973914, "loss": 1.2705, "step": 6067 }, { "epoch": 0.5321136869047407, "grad_norm": 0.09423828125, "learning_rate": 0.0017044112440630448, "loss": 1.1289, "step": 6068 }, { "epoch": 0.5322013786791152, "grad_norm": 0.1044921875, "learning_rate": 0.0017040038875665328, "loss": 1.1769, "step": 6069 }, { "epoch": 0.5322890704534898, "grad_norm": 0.0966796875, "learning_rate": 0.001703596526145006, "loss": 1.1961, "step": 6070 }, { "epoch": 0.5323767622278643, "grad_norm": 0.1015625, "learning_rate": 0.0017031891598356145, "loss": 1.2114, "step": 6071 }, { "epoch": 0.5324644540022389, "grad_norm": 0.09326171875, "learning_rate": 0.0017027817886755086, "loss": 1.1566, "step": 6072 }, { "epoch": 0.5325521457766135, "grad_norm": 0.050048828125, "learning_rate": 0.0017023744127018397, "loss": 1.1705, "step": 6073 }, { "epoch": 0.532639837550988, "grad_norm": 0.064453125, "learning_rate": 0.0017019670319517598, "loss": 1.2078, "step": 6074 }, { "epoch": 0.5327275293253625, "grad_norm": 0.07177734375, "learning_rate": 0.0017015596464624208, "loss": 1.244, "step": 6075 }, { "epoch": 0.5328152210997371, "grad_norm": 0.06982421875, "learning_rate": 0.001701152256270975, "loss": 1.2225, "step": 6076 }, { "epoch": 0.5329029128741116, "grad_norm": 0.0751953125, "learning_rate": 0.0017007448614145754, "loss": 1.1668, "step": 6077 }, { "epoch": 0.5329906046484861, "grad_norm": 0.0888671875, "learning_rate": 0.0017003374619303754, "loss": 1.1511, "step": 6078 }, { "epoch": 0.5330782964228606, "grad_norm": 0.0556640625, "learning_rate": 0.0016999300578555295, "loss": 1.2157, "step": 6079 }, { "epoch": 0.5331659881972353, "grad_norm": 0.083984375, "learning_rate": 0.0016995226492271904, "loss": 1.215, "step": 6080 }, { "epoch": 0.5332536799716098, "grad_norm": 0.0888671875, "learning_rate": 0.0016991152360825137, "loss": 1.2411, "step": 6081 }, { "epoch": 0.5333413717459843, "grad_norm": 0.11181640625, "learning_rate": 0.0016987078184586546, "loss": 1.2639, "step": 6082 }, { "epoch": 0.5334290635203589, "grad_norm": 0.0810546875, "learning_rate": 0.0016983003963927678, "loss": 1.1756, "step": 6083 }, { "epoch": 0.5335167552947334, "grad_norm": 0.07421875, "learning_rate": 0.0016978929699220098, "loss": 1.1801, "step": 6084 }, { "epoch": 0.5336044470691079, "grad_norm": 0.1484375, "learning_rate": 0.0016974855390835364, "loss": 1.1388, "step": 6085 }, { "epoch": 0.5336921388434825, "grad_norm": 0.057373046875, "learning_rate": 0.0016970781039145045, "loss": 1.2668, "step": 6086 }, { "epoch": 0.533779830617857, "grad_norm": 0.061767578125, "learning_rate": 0.0016966706644520708, "loss": 1.1852, "step": 6087 }, { "epoch": 0.5338675223922316, "grad_norm": 0.078125, "learning_rate": 0.0016962632207333937, "loss": 1.2049, "step": 6088 }, { "epoch": 0.5339552141666062, "grad_norm": 0.050537109375, "learning_rate": 0.0016958557727956296, "loss": 1.1557, "step": 6089 }, { "epoch": 0.5340429059409807, "grad_norm": 0.0625, "learning_rate": 0.001695448320675938, "loss": 1.2429, "step": 6090 }, { "epoch": 0.5341305977153552, "grad_norm": 0.050048828125, "learning_rate": 0.0016950408644114765, "loss": 1.2352, "step": 6091 }, { "epoch": 0.5342182894897298, "grad_norm": 0.0732421875, "learning_rate": 0.0016946334040394044, "loss": 1.167, "step": 6092 }, { "epoch": 0.5343059812641043, "grad_norm": 0.047607421875, "learning_rate": 0.0016942259395968811, "loss": 1.2015, "step": 6093 }, { "epoch": 0.5343936730384788, "grad_norm": 0.08349609375, "learning_rate": 0.001693818471121066, "loss": 1.2373, "step": 6094 }, { "epoch": 0.5344813648128535, "grad_norm": 0.0556640625, "learning_rate": 0.0016934109986491198, "loss": 1.1605, "step": 6095 }, { "epoch": 0.534569056587228, "grad_norm": 0.058349609375, "learning_rate": 0.001693003522218203, "loss": 1.1489, "step": 6096 }, { "epoch": 0.5346567483616025, "grad_norm": 0.07080078125, "learning_rate": 0.0016925960418654753, "loss": 1.2035, "step": 6097 }, { "epoch": 0.5347444401359771, "grad_norm": 0.052734375, "learning_rate": 0.0016921885576280985, "loss": 1.2205, "step": 6098 }, { "epoch": 0.5348321319103516, "grad_norm": 0.05908203125, "learning_rate": 0.0016917810695432347, "loss": 1.1977, "step": 6099 }, { "epoch": 0.5349198236847261, "grad_norm": 0.07421875, "learning_rate": 0.001691373577648045, "loss": 1.1692, "step": 6100 }, { "epoch": 0.5350075154591007, "grad_norm": 0.06298828125, "learning_rate": 0.0016909660819796916, "loss": 1.2141, "step": 6101 }, { "epoch": 0.5350952072334753, "grad_norm": 0.07080078125, "learning_rate": 0.0016905585825753374, "loss": 1.1856, "step": 6102 }, { "epoch": 0.5351828990078498, "grad_norm": 0.06494140625, "learning_rate": 0.0016901510794721454, "loss": 1.2701, "step": 6103 }, { "epoch": 0.5352705907822243, "grad_norm": 0.04833984375, "learning_rate": 0.0016897435727072781, "loss": 1.197, "step": 6104 }, { "epoch": 0.5353582825565989, "grad_norm": 0.045654296875, "learning_rate": 0.0016893360623179005, "loss": 1.1793, "step": 6105 }, { "epoch": 0.5354459743309734, "grad_norm": 0.0693359375, "learning_rate": 0.0016889285483411747, "loss": 1.2038, "step": 6106 }, { "epoch": 0.5355336661053479, "grad_norm": 0.051025390625, "learning_rate": 0.0016885210308142658, "loss": 1.1948, "step": 6107 }, { "epoch": 0.5356213578797225, "grad_norm": 0.05615234375, "learning_rate": 0.0016881135097743388, "loss": 1.1958, "step": 6108 }, { "epoch": 0.535709049654097, "grad_norm": 0.05859375, "learning_rate": 0.0016877059852585577, "loss": 1.1904, "step": 6109 }, { "epoch": 0.5357967414284716, "grad_norm": 0.072265625, "learning_rate": 0.0016872984573040883, "loss": 1.2038, "step": 6110 }, { "epoch": 0.5358844332028462, "grad_norm": 0.06494140625, "learning_rate": 0.0016868909259480955, "loss": 1.2296, "step": 6111 }, { "epoch": 0.5359721249772207, "grad_norm": 0.051513671875, "learning_rate": 0.001686483391227746, "loss": 1.253, "step": 6112 }, { "epoch": 0.5360598167515952, "grad_norm": 0.06103515625, "learning_rate": 0.0016860758531802055, "loss": 1.1813, "step": 6113 }, { "epoch": 0.5361475085259698, "grad_norm": 0.0859375, "learning_rate": 0.0016856683118426398, "loss": 1.1752, "step": 6114 }, { "epoch": 0.5362352003003443, "grad_norm": 0.0654296875, "learning_rate": 0.0016852607672522165, "loss": 1.2154, "step": 6115 }, { "epoch": 0.5363228920747188, "grad_norm": 0.08740234375, "learning_rate": 0.0016848532194461017, "loss": 1.1873, "step": 6116 }, { "epoch": 0.5364105838490935, "grad_norm": 0.0703125, "learning_rate": 0.0016844456684614638, "loss": 1.1859, "step": 6117 }, { "epoch": 0.536498275623468, "grad_norm": 0.052490234375, "learning_rate": 0.0016840381143354696, "loss": 1.2095, "step": 6118 }, { "epoch": 0.5365859673978425, "grad_norm": 0.0927734375, "learning_rate": 0.0016836305571052879, "loss": 1.1509, "step": 6119 }, { "epoch": 0.5366736591722171, "grad_norm": 0.07275390625, "learning_rate": 0.0016832229968080854, "loss": 1.1686, "step": 6120 }, { "epoch": 0.5367613509465916, "grad_norm": 0.060302734375, "learning_rate": 0.0016828154334810317, "loss": 1.2106, "step": 6121 }, { "epoch": 0.5368490427209661, "grad_norm": 0.06640625, "learning_rate": 0.0016824078671612954, "loss": 1.1513, "step": 6122 }, { "epoch": 0.5369367344953407, "grad_norm": 0.054931640625, "learning_rate": 0.0016820002978860445, "loss": 1.2239, "step": 6123 }, { "epoch": 0.5370244262697152, "grad_norm": 0.061767578125, "learning_rate": 0.0016815927256924501, "loss": 1.1395, "step": 6124 }, { "epoch": 0.5371121180440898, "grad_norm": 0.054443359375, "learning_rate": 0.0016811851506176795, "loss": 1.1362, "step": 6125 }, { "epoch": 0.5371998098184644, "grad_norm": 0.054443359375, "learning_rate": 0.0016807775726989046, "loss": 1.1433, "step": 6126 }, { "epoch": 0.5372875015928389, "grad_norm": 0.062255859375, "learning_rate": 0.001680369991973294, "loss": 1.1391, "step": 6127 }, { "epoch": 0.5373751933672134, "grad_norm": 0.08349609375, "learning_rate": 0.001679962408478019, "loss": 1.2431, "step": 6128 }, { "epoch": 0.5374628851415879, "grad_norm": 0.078125, "learning_rate": 0.001679554822250249, "loss": 1.1762, "step": 6129 }, { "epoch": 0.5375505769159625, "grad_norm": 0.07861328125, "learning_rate": 0.0016791472333271564, "loss": 1.2069, "step": 6130 }, { "epoch": 0.537638268690337, "grad_norm": 0.0771484375, "learning_rate": 0.0016787396417459108, "loss": 1.1879, "step": 6131 }, { "epoch": 0.5377259604647115, "grad_norm": 0.08740234375, "learning_rate": 0.001678332047543684, "loss": 1.1571, "step": 6132 }, { "epoch": 0.5378136522390862, "grad_norm": 0.09814453125, "learning_rate": 0.0016779244507576477, "loss": 1.1535, "step": 6133 }, { "epoch": 0.5379013440134607, "grad_norm": 0.09765625, "learning_rate": 0.0016775168514249735, "loss": 1.1928, "step": 6134 }, { "epoch": 0.5379890357878352, "grad_norm": 0.0634765625, "learning_rate": 0.0016771092495828338, "loss": 1.179, "step": 6135 }, { "epoch": 0.5380767275622098, "grad_norm": 0.11474609375, "learning_rate": 0.0016767016452684, "loss": 1.2353, "step": 6136 }, { "epoch": 0.5381644193365843, "grad_norm": 0.0673828125, "learning_rate": 0.001676294038518846, "loss": 1.2419, "step": 6137 }, { "epoch": 0.5382521111109588, "grad_norm": 0.080078125, "learning_rate": 0.0016758864293713429, "loss": 1.2448, "step": 6138 }, { "epoch": 0.5383398028853335, "grad_norm": 0.05224609375, "learning_rate": 0.0016754788178630646, "loss": 1.1988, "step": 6139 }, { "epoch": 0.538427494659708, "grad_norm": 0.07470703125, "learning_rate": 0.0016750712040311832, "loss": 1.1571, "step": 6140 }, { "epoch": 0.5385151864340825, "grad_norm": 0.08251953125, "learning_rate": 0.0016746635879128734, "loss": 1.2117, "step": 6141 }, { "epoch": 0.5386028782084571, "grad_norm": 0.059326171875, "learning_rate": 0.0016742559695453075, "loss": 1.2107, "step": 6142 }, { "epoch": 0.5386905699828316, "grad_norm": 0.0986328125, "learning_rate": 0.0016738483489656603, "loss": 1.1684, "step": 6143 }, { "epoch": 0.5387782617572061, "grad_norm": 0.06494140625, "learning_rate": 0.0016734407262111047, "loss": 1.1604, "step": 6144 }, { "epoch": 0.5388659535315807, "grad_norm": 0.0634765625, "learning_rate": 0.0016730331013188162, "loss": 1.1765, "step": 6145 }, { "epoch": 0.5389536453059552, "grad_norm": 0.0791015625, "learning_rate": 0.0016726254743259677, "loss": 1.1642, "step": 6146 }, { "epoch": 0.5390413370803298, "grad_norm": 0.062255859375, "learning_rate": 0.0016722178452697343, "loss": 1.1586, "step": 6147 }, { "epoch": 0.5391290288547044, "grad_norm": 0.06396484375, "learning_rate": 0.0016718102141872913, "loss": 1.1863, "step": 6148 }, { "epoch": 0.5392167206290789, "grad_norm": 0.1025390625, "learning_rate": 0.0016714025811158126, "loss": 1.1197, "step": 6149 }, { "epoch": 0.5393044124034534, "grad_norm": 0.07861328125, "learning_rate": 0.001670994946092474, "loss": 1.2212, "step": 6150 }, { "epoch": 0.5393921041778279, "grad_norm": 0.072265625, "learning_rate": 0.0016705873091544502, "loss": 1.1073, "step": 6151 }, { "epoch": 0.5394797959522025, "grad_norm": 0.08837890625, "learning_rate": 0.0016701796703389173, "loss": 1.1708, "step": 6152 }, { "epoch": 0.539567487726577, "grad_norm": 0.06103515625, "learning_rate": 0.00166977202968305, "loss": 1.1832, "step": 6153 }, { "epoch": 0.5396551795009515, "grad_norm": 0.072265625, "learning_rate": 0.0016693643872240252, "loss": 1.2133, "step": 6154 }, { "epoch": 0.5397428712753262, "grad_norm": 0.078125, "learning_rate": 0.0016689567429990185, "loss": 1.2414, "step": 6155 }, { "epoch": 0.5398305630497007, "grad_norm": 0.05322265625, "learning_rate": 0.0016685490970452053, "loss": 1.2066, "step": 6156 }, { "epoch": 0.5399182548240752, "grad_norm": 0.05322265625, "learning_rate": 0.0016681414493997624, "loss": 1.1924, "step": 6157 }, { "epoch": 0.5400059465984498, "grad_norm": 0.08984375, "learning_rate": 0.001667733800099866, "loss": 1.1581, "step": 6158 }, { "epoch": 0.5400936383728243, "grad_norm": 0.05322265625, "learning_rate": 0.0016673261491826931, "loss": 1.1925, "step": 6159 }, { "epoch": 0.5401813301471988, "grad_norm": 0.06396484375, "learning_rate": 0.0016669184966854197, "loss": 1.2244, "step": 6160 }, { "epoch": 0.5402690219215734, "grad_norm": 0.09619140625, "learning_rate": 0.0016665108426452237, "loss": 1.197, "step": 6161 }, { "epoch": 0.540356713695948, "grad_norm": 0.058349609375, "learning_rate": 0.0016661031870992809, "loss": 1.2008, "step": 6162 }, { "epoch": 0.5404444054703225, "grad_norm": 0.146484375, "learning_rate": 0.0016656955300847695, "loss": 1.2287, "step": 6163 }, { "epoch": 0.5405320972446971, "grad_norm": 0.06689453125, "learning_rate": 0.001665287871638866, "loss": 1.1894, "step": 6164 }, { "epoch": 0.5406197890190716, "grad_norm": 0.0986328125, "learning_rate": 0.001664880211798748, "loss": 1.212, "step": 6165 }, { "epoch": 0.5407074807934461, "grad_norm": 0.09326171875, "learning_rate": 0.001664472550601593, "loss": 1.2552, "step": 6166 }, { "epoch": 0.5407951725678207, "grad_norm": 0.057861328125, "learning_rate": 0.0016640648880845792, "loss": 1.2336, "step": 6167 }, { "epoch": 0.5408828643421952, "grad_norm": 0.05419921875, "learning_rate": 0.0016636572242848837, "loss": 1.1402, "step": 6168 }, { "epoch": 0.5409705561165697, "grad_norm": 0.050537109375, "learning_rate": 0.0016632495592396842, "loss": 1.1533, "step": 6169 }, { "epoch": 0.5410582478909444, "grad_norm": 0.058837890625, "learning_rate": 0.0016628418929861596, "loss": 1.2728, "step": 6170 }, { "epoch": 0.5411459396653189, "grad_norm": 0.060791015625, "learning_rate": 0.001662434225561487, "loss": 1.1924, "step": 6171 }, { "epoch": 0.5412336314396934, "grad_norm": 0.05908203125, "learning_rate": 0.0016620265570028462, "loss": 1.213, "step": 6172 }, { "epoch": 0.541321323214068, "grad_norm": 0.06494140625, "learning_rate": 0.0016616188873474133, "loss": 1.1503, "step": 6173 }, { "epoch": 0.5414090149884425, "grad_norm": 0.06787109375, "learning_rate": 0.0016612112166323688, "loss": 1.1573, "step": 6174 }, { "epoch": 0.541496706762817, "grad_norm": 0.0634765625, "learning_rate": 0.0016608035448948896, "loss": 1.2252, "step": 6175 }, { "epoch": 0.5415843985371915, "grad_norm": 0.07080078125, "learning_rate": 0.001660395872172155, "loss": 1.1762, "step": 6176 }, { "epoch": 0.5416720903115662, "grad_norm": 0.05517578125, "learning_rate": 0.001659988198501344, "loss": 1.1513, "step": 6177 }, { "epoch": 0.5417597820859407, "grad_norm": 0.0986328125, "learning_rate": 0.0016595805239196346, "loss": 1.2014, "step": 6178 }, { "epoch": 0.5418474738603152, "grad_norm": 0.06884765625, "learning_rate": 0.0016591728484642069, "loss": 1.2542, "step": 6179 }, { "epoch": 0.5419351656346898, "grad_norm": 0.10302734375, "learning_rate": 0.0016587651721722385, "loss": 1.1613, "step": 6180 }, { "epoch": 0.5420228574090643, "grad_norm": 0.09619140625, "learning_rate": 0.0016583574950809095, "loss": 1.1805, "step": 6181 }, { "epoch": 0.5421105491834388, "grad_norm": 0.054443359375, "learning_rate": 0.0016579498172273983, "loss": 1.1878, "step": 6182 }, { "epoch": 0.5421982409578134, "grad_norm": 0.1337890625, "learning_rate": 0.0016575421386488842, "loss": 1.1471, "step": 6183 }, { "epoch": 0.542285932732188, "grad_norm": 0.10888671875, "learning_rate": 0.0016571344593825465, "loss": 1.2014, "step": 6184 }, { "epoch": 0.5423736245065625, "grad_norm": 0.09228515625, "learning_rate": 0.0016567267794655649, "loss": 1.2028, "step": 6185 }, { "epoch": 0.5424613162809371, "grad_norm": 0.1435546875, "learning_rate": 0.0016563190989351175, "loss": 1.2114, "step": 6186 }, { "epoch": 0.5425490080553116, "grad_norm": 0.06982421875, "learning_rate": 0.0016559114178283853, "loss": 1.2216, "step": 6187 }, { "epoch": 0.5426366998296861, "grad_norm": 0.1171875, "learning_rate": 0.001655503736182547, "loss": 1.2337, "step": 6188 }, { "epoch": 0.5427243916040607, "grad_norm": 0.13671875, "learning_rate": 0.0016550960540347823, "loss": 1.179, "step": 6189 }, { "epoch": 0.5428120833784352, "grad_norm": 0.05615234375, "learning_rate": 0.0016546883714222706, "loss": 1.241, "step": 6190 }, { "epoch": 0.5428997751528097, "grad_norm": 0.09326171875, "learning_rate": 0.0016542806883821913, "loss": 1.1632, "step": 6191 }, { "epoch": 0.5429874669271844, "grad_norm": 0.07080078125, "learning_rate": 0.0016538730049517243, "loss": 1.1938, "step": 6192 }, { "epoch": 0.5430751587015589, "grad_norm": 0.050537109375, "learning_rate": 0.001653465321168049, "loss": 1.1758, "step": 6193 }, { "epoch": 0.5431628504759334, "grad_norm": 0.061279296875, "learning_rate": 0.0016530576370683459, "loss": 1.2676, "step": 6194 }, { "epoch": 0.543250542250308, "grad_norm": 0.058349609375, "learning_rate": 0.0016526499526897935, "loss": 1.1886, "step": 6195 }, { "epoch": 0.5433382340246825, "grad_norm": 0.08349609375, "learning_rate": 0.001652242268069573, "loss": 1.2115, "step": 6196 }, { "epoch": 0.543425925799057, "grad_norm": 0.0673828125, "learning_rate": 0.0016518345832448628, "loss": 1.1913, "step": 6197 }, { "epoch": 0.5435136175734316, "grad_norm": 0.060302734375, "learning_rate": 0.0016514268982528442, "loss": 1.193, "step": 6198 }, { "epoch": 0.5436013093478061, "grad_norm": 0.052734375, "learning_rate": 0.001651019213130695, "loss": 1.17, "step": 6199 }, { "epoch": 0.5436890011221807, "grad_norm": 0.05419921875, "learning_rate": 0.0016506115279155968, "loss": 1.22, "step": 6200 }, { "epoch": 0.5437766928965552, "grad_norm": 0.054443359375, "learning_rate": 0.0016502038426447292, "loss": 1.1939, "step": 6201 }, { "epoch": 0.5438643846709298, "grad_norm": 0.072265625, "learning_rate": 0.0016497961573552712, "loss": 1.1717, "step": 6202 }, { "epoch": 0.5439520764453043, "grad_norm": 0.058349609375, "learning_rate": 0.0016493884720844034, "loss": 1.223, "step": 6203 }, { "epoch": 0.5440397682196788, "grad_norm": 0.09716796875, "learning_rate": 0.001648980786869305, "loss": 1.1651, "step": 6204 }, { "epoch": 0.5441274599940534, "grad_norm": 0.055419921875, "learning_rate": 0.0016485731017471567, "loss": 1.2385, "step": 6205 }, { "epoch": 0.5442151517684279, "grad_norm": 0.059814453125, "learning_rate": 0.0016481654167551372, "loss": 1.2179, "step": 6206 }, { "epoch": 0.5443028435428025, "grad_norm": 0.11767578125, "learning_rate": 0.0016477577319304274, "loss": 1.2117, "step": 6207 }, { "epoch": 0.5443905353171771, "grad_norm": 0.0595703125, "learning_rate": 0.0016473500473102062, "loss": 1.1888, "step": 6208 }, { "epoch": 0.5444782270915516, "grad_norm": 0.099609375, "learning_rate": 0.0016469423629316545, "loss": 1.2028, "step": 6209 }, { "epoch": 0.5445659188659261, "grad_norm": 0.0634765625, "learning_rate": 0.0016465346788319516, "loss": 1.1607, "step": 6210 }, { "epoch": 0.5446536106403007, "grad_norm": 0.07958984375, "learning_rate": 0.0016461269950482763, "loss": 1.1492, "step": 6211 }, { "epoch": 0.5447413024146752, "grad_norm": 0.07470703125, "learning_rate": 0.0016457193116178096, "loss": 1.1963, "step": 6212 }, { "epoch": 0.5448289941890497, "grad_norm": 0.0654296875, "learning_rate": 0.00164531162857773, "loss": 1.2261, "step": 6213 }, { "epoch": 0.5449166859634244, "grad_norm": 0.11083984375, "learning_rate": 0.0016449039459652183, "loss": 1.1495, "step": 6214 }, { "epoch": 0.5450043777377989, "grad_norm": 0.1142578125, "learning_rate": 0.0016444962638174534, "loss": 1.1951, "step": 6215 }, { "epoch": 0.5450920695121734, "grad_norm": 0.06201171875, "learning_rate": 0.0016440885821716147, "loss": 1.2331, "step": 6216 }, { "epoch": 0.545179761286548, "grad_norm": 0.1005859375, "learning_rate": 0.0016436809010648824, "loss": 1.2161, "step": 6217 }, { "epoch": 0.5452674530609225, "grad_norm": 0.056884765625, "learning_rate": 0.0016432732205344355, "loss": 1.2045, "step": 6218 }, { "epoch": 0.545355144835297, "grad_norm": 0.053466796875, "learning_rate": 0.0016428655406174539, "loss": 1.2037, "step": 6219 }, { "epoch": 0.5454428366096716, "grad_norm": 0.057373046875, "learning_rate": 0.001642457861351116, "loss": 1.1337, "step": 6220 }, { "epoch": 0.5455305283840461, "grad_norm": 0.048583984375, "learning_rate": 0.001642050182772602, "loss": 1.1929, "step": 6221 }, { "epoch": 0.5456182201584207, "grad_norm": 0.05029296875, "learning_rate": 0.0016416425049190905, "loss": 1.1688, "step": 6222 }, { "epoch": 0.5457059119327952, "grad_norm": 0.05126953125, "learning_rate": 0.0016412348278277615, "loss": 1.196, "step": 6223 }, { "epoch": 0.5457936037071698, "grad_norm": 0.0498046875, "learning_rate": 0.0016408271515357931, "loss": 1.1508, "step": 6224 }, { "epoch": 0.5458812954815443, "grad_norm": 0.048583984375, "learning_rate": 0.0016404194760803654, "loss": 1.1806, "step": 6225 }, { "epoch": 0.5459689872559188, "grad_norm": 0.058349609375, "learning_rate": 0.0016400118014986559, "loss": 1.1287, "step": 6226 }, { "epoch": 0.5460566790302934, "grad_norm": 0.05859375, "learning_rate": 0.0016396041278278455, "loss": 1.172, "step": 6227 }, { "epoch": 0.5461443708046679, "grad_norm": 0.0771484375, "learning_rate": 0.001639196455105111, "loss": 1.1998, "step": 6228 }, { "epoch": 0.5462320625790424, "grad_norm": 0.0478515625, "learning_rate": 0.0016387887833676318, "loss": 1.1833, "step": 6229 }, { "epoch": 0.5463197543534171, "grad_norm": 0.080078125, "learning_rate": 0.001638381112652587, "loss": 1.175, "step": 6230 }, { "epoch": 0.5464074461277916, "grad_norm": 0.07568359375, "learning_rate": 0.0016379734429971544, "loss": 1.1976, "step": 6231 }, { "epoch": 0.5464951379021661, "grad_norm": 0.1142578125, "learning_rate": 0.001637565774438513, "loss": 1.2004, "step": 6232 }, { "epoch": 0.5465828296765407, "grad_norm": 0.08544921875, "learning_rate": 0.0016371581070138408, "loss": 1.2174, "step": 6233 }, { "epoch": 0.5466705214509152, "grad_norm": 0.060302734375, "learning_rate": 0.0016367504407603162, "loss": 1.1645, "step": 6234 }, { "epoch": 0.5467582132252897, "grad_norm": 0.11328125, "learning_rate": 0.0016363427757151168, "loss": 1.2489, "step": 6235 }, { "epoch": 0.5468459049996643, "grad_norm": 0.054931640625, "learning_rate": 0.0016359351119154216, "loss": 1.1828, "step": 6236 }, { "epoch": 0.5469335967740389, "grad_norm": 0.06640625, "learning_rate": 0.001635527449398407, "loss": 1.1366, "step": 6237 }, { "epoch": 0.5470212885484134, "grad_norm": 0.06884765625, "learning_rate": 0.0016351197882012523, "loss": 1.1454, "step": 6238 }, { "epoch": 0.547108980322788, "grad_norm": 0.051025390625, "learning_rate": 0.0016347121283611342, "loss": 1.2568, "step": 6239 }, { "epoch": 0.5471966720971625, "grad_norm": 0.0498046875, "learning_rate": 0.0016343044699152311, "loss": 1.1857, "step": 6240 }, { "epoch": 0.547284363871537, "grad_norm": 0.08544921875, "learning_rate": 0.001633896812900719, "loss": 1.1532, "step": 6241 }, { "epoch": 0.5473720556459116, "grad_norm": 0.064453125, "learning_rate": 0.0016334891573547762, "loss": 1.1797, "step": 6242 }, { "epoch": 0.5474597474202861, "grad_norm": 0.05615234375, "learning_rate": 0.0016330815033145803, "loss": 1.2969, "step": 6243 }, { "epoch": 0.5475474391946606, "grad_norm": 0.052001953125, "learning_rate": 0.0016326738508173075, "loss": 1.2848, "step": 6244 }, { "epoch": 0.5476351309690353, "grad_norm": 0.056640625, "learning_rate": 0.0016322661999001345, "loss": 1.2477, "step": 6245 }, { "epoch": 0.5477228227434098, "grad_norm": 0.064453125, "learning_rate": 0.0016318585506002383, "loss": 1.1922, "step": 6246 }, { "epoch": 0.5478105145177843, "grad_norm": 0.0634765625, "learning_rate": 0.0016314509029547951, "loss": 1.2118, "step": 6247 }, { "epoch": 0.5478982062921588, "grad_norm": 0.08984375, "learning_rate": 0.0016310432570009823, "loss": 1.1672, "step": 6248 }, { "epoch": 0.5479858980665334, "grad_norm": 0.059814453125, "learning_rate": 0.0016306356127759752, "loss": 1.1371, "step": 6249 }, { "epoch": 0.5480735898409079, "grad_norm": 0.052001953125, "learning_rate": 0.0016302279703169502, "loss": 1.2152, "step": 6250 }, { "epoch": 0.5481612816152824, "grad_norm": 0.06884765625, "learning_rate": 0.0016298203296610829, "loss": 1.1606, "step": 6251 }, { "epoch": 0.5482489733896571, "grad_norm": 0.0693359375, "learning_rate": 0.0016294126908455503, "loss": 1.2029, "step": 6252 }, { "epoch": 0.5483366651640316, "grad_norm": 0.0703125, "learning_rate": 0.0016290050539075262, "loss": 1.2359, "step": 6253 }, { "epoch": 0.5484243569384061, "grad_norm": 0.09375, "learning_rate": 0.0016285974188841878, "loss": 1.2041, "step": 6254 }, { "epoch": 0.5485120487127807, "grad_norm": 0.0556640625, "learning_rate": 0.001628189785812709, "loss": 1.1968, "step": 6255 }, { "epoch": 0.5485997404871552, "grad_norm": 0.060302734375, "learning_rate": 0.0016277821547302656, "loss": 1.235, "step": 6256 }, { "epoch": 0.5486874322615297, "grad_norm": 0.08984375, "learning_rate": 0.0016273745256740323, "loss": 1.1694, "step": 6257 }, { "epoch": 0.5487751240359043, "grad_norm": 0.07080078125, "learning_rate": 0.001626966898681184, "loss": 1.1755, "step": 6258 }, { "epoch": 0.5488628158102788, "grad_norm": 0.10595703125, "learning_rate": 0.0016265592737888953, "loss": 1.1739, "step": 6259 }, { "epoch": 0.5489505075846534, "grad_norm": 0.109375, "learning_rate": 0.0016261516510343397, "loss": 1.2277, "step": 6260 }, { "epoch": 0.549038199359028, "grad_norm": 0.09228515625, "learning_rate": 0.0016257440304546927, "loss": 1.1765, "step": 6261 }, { "epoch": 0.5491258911334025, "grad_norm": 0.07275390625, "learning_rate": 0.0016253364120871275, "loss": 1.1667, "step": 6262 }, { "epoch": 0.549213582907777, "grad_norm": 0.05517578125, "learning_rate": 0.0016249287959688174, "loss": 1.225, "step": 6263 }, { "epoch": 0.5493012746821516, "grad_norm": 0.068359375, "learning_rate": 0.001624521182136936, "loss": 1.1772, "step": 6264 }, { "epoch": 0.5493889664565261, "grad_norm": 0.11669921875, "learning_rate": 0.0016241135706286578, "loss": 1.2173, "step": 6265 }, { "epoch": 0.5494766582309006, "grad_norm": 0.076171875, "learning_rate": 0.0016237059614811545, "loss": 1.172, "step": 6266 }, { "epoch": 0.5495643500052753, "grad_norm": 0.059814453125, "learning_rate": 0.0016232983547316002, "loss": 1.209, "step": 6267 }, { "epoch": 0.5496520417796498, "grad_norm": 0.053955078125, "learning_rate": 0.0016228907504171664, "loss": 1.1323, "step": 6268 }, { "epoch": 0.5497397335540243, "grad_norm": 0.06005859375, "learning_rate": 0.0016224831485750267, "loss": 1.1676, "step": 6269 }, { "epoch": 0.5498274253283988, "grad_norm": 0.080078125, "learning_rate": 0.0016220755492423527, "loss": 1.1769, "step": 6270 }, { "epoch": 0.5499151171027734, "grad_norm": 0.061279296875, "learning_rate": 0.001621667952456316, "loss": 1.1863, "step": 6271 }, { "epoch": 0.5500028088771479, "grad_norm": 0.07861328125, "learning_rate": 0.0016212603582540894, "loss": 1.2104, "step": 6272 }, { "epoch": 0.5500905006515224, "grad_norm": 0.0693359375, "learning_rate": 0.0016208527666728438, "loss": 1.2243, "step": 6273 }, { "epoch": 0.550178192425897, "grad_norm": 0.06201171875, "learning_rate": 0.001620445177749751, "loss": 1.1656, "step": 6274 }, { "epoch": 0.5502658842002716, "grad_norm": 0.05322265625, "learning_rate": 0.001620037591521981, "loss": 1.1798, "step": 6275 }, { "epoch": 0.5503535759746461, "grad_norm": 0.06396484375, "learning_rate": 0.001619630008026706, "loss": 1.2509, "step": 6276 }, { "epoch": 0.5504412677490207, "grad_norm": 0.053466796875, "learning_rate": 0.0016192224273010956, "loss": 1.2144, "step": 6277 }, { "epoch": 0.5505289595233952, "grad_norm": 0.052001953125, "learning_rate": 0.0016188148493823207, "loss": 1.2423, "step": 6278 }, { "epoch": 0.5506166512977697, "grad_norm": 0.057861328125, "learning_rate": 0.001618407274307551, "loss": 1.1792, "step": 6279 }, { "epoch": 0.5507043430721443, "grad_norm": 0.04833984375, "learning_rate": 0.0016179997021139555, "loss": 1.1725, "step": 6280 }, { "epoch": 0.5507920348465188, "grad_norm": 0.05712890625, "learning_rate": 0.0016175921328387053, "loss": 1.2296, "step": 6281 }, { "epoch": 0.5508797266208934, "grad_norm": 0.05615234375, "learning_rate": 0.0016171845665189687, "loss": 1.214, "step": 6282 }, { "epoch": 0.550967418395268, "grad_norm": 0.0498046875, "learning_rate": 0.001616777003191915, "loss": 1.228, "step": 6283 }, { "epoch": 0.5510551101696425, "grad_norm": 0.060546875, "learning_rate": 0.0016163694428947128, "loss": 1.1385, "step": 6284 }, { "epoch": 0.551142801944017, "grad_norm": 0.061767578125, "learning_rate": 0.0016159618856645306, "loss": 1.2321, "step": 6285 }, { "epoch": 0.5512304937183916, "grad_norm": 0.04931640625, "learning_rate": 0.0016155543315385362, "loss": 1.1448, "step": 6286 }, { "epoch": 0.5513181854927661, "grad_norm": 0.0732421875, "learning_rate": 0.0016151467805538982, "loss": 1.1906, "step": 6287 }, { "epoch": 0.5514058772671406, "grad_norm": 0.07177734375, "learning_rate": 0.001614739232747784, "loss": 1.1795, "step": 6288 }, { "epoch": 0.5514935690415153, "grad_norm": 0.09912109375, "learning_rate": 0.0016143316881573606, "loss": 1.1778, "step": 6289 }, { "epoch": 0.5515812608158898, "grad_norm": 0.06494140625, "learning_rate": 0.001613924146819795, "loss": 1.2161, "step": 6290 }, { "epoch": 0.5516689525902643, "grad_norm": 0.0712890625, "learning_rate": 0.0016135166087722541, "loss": 1.1597, "step": 6291 }, { "epoch": 0.5517566443646389, "grad_norm": 0.05126953125, "learning_rate": 0.0016131090740519045, "loss": 1.192, "step": 6292 }, { "epoch": 0.5518443361390134, "grad_norm": 0.06396484375, "learning_rate": 0.0016127015426959119, "loss": 1.153, "step": 6293 }, { "epoch": 0.5519320279133879, "grad_norm": 0.0712890625, "learning_rate": 0.0016122940147414422, "loss": 1.2313, "step": 6294 }, { "epoch": 0.5520197196877624, "grad_norm": 0.061767578125, "learning_rate": 0.0016118864902256618, "loss": 1.2929, "step": 6295 }, { "epoch": 0.552107411462137, "grad_norm": 0.056884765625, "learning_rate": 0.0016114789691857347, "loss": 1.2507, "step": 6296 }, { "epoch": 0.5521951032365116, "grad_norm": 0.0810546875, "learning_rate": 0.0016110714516588257, "loss": 1.1634, "step": 6297 }, { "epoch": 0.5522827950108861, "grad_norm": 0.054931640625, "learning_rate": 0.0016106639376821004, "loss": 1.2073, "step": 6298 }, { "epoch": 0.5523704867852607, "grad_norm": 0.0673828125, "learning_rate": 0.0016102564272927223, "loss": 1.2071, "step": 6299 }, { "epoch": 0.5524581785596352, "grad_norm": 0.09619140625, "learning_rate": 0.001609848920527855, "loss": 1.1927, "step": 6300 }, { "epoch": 0.5525458703340097, "grad_norm": 0.05517578125, "learning_rate": 0.0016094414174246628, "loss": 1.2397, "step": 6301 }, { "epoch": 0.5526335621083843, "grad_norm": 0.07666015625, "learning_rate": 0.0016090339180203084, "loss": 1.2688, "step": 6302 }, { "epoch": 0.5527212538827588, "grad_norm": 0.08447265625, "learning_rate": 0.0016086264223519554, "loss": 1.1718, "step": 6303 }, { "epoch": 0.5528089456571333, "grad_norm": 0.049560546875, "learning_rate": 0.0016082189304567657, "loss": 1.21, "step": 6304 }, { "epoch": 0.552896637431508, "grad_norm": 0.0703125, "learning_rate": 0.0016078114423719015, "loss": 1.1708, "step": 6305 }, { "epoch": 0.5529843292058825, "grad_norm": 0.06591796875, "learning_rate": 0.001607403958134525, "loss": 1.2078, "step": 6306 }, { "epoch": 0.553072020980257, "grad_norm": 0.07470703125, "learning_rate": 0.0016069964777817975, "loss": 1.2056, "step": 6307 }, { "epoch": 0.5531597127546316, "grad_norm": 0.09765625, "learning_rate": 0.0016065890013508802, "loss": 1.2215, "step": 6308 }, { "epoch": 0.5532474045290061, "grad_norm": 0.076171875, "learning_rate": 0.001606181528878934, "loss": 1.216, "step": 6309 }, { "epoch": 0.5533350963033806, "grad_norm": 0.09326171875, "learning_rate": 0.0016057740604031193, "loss": 1.2386, "step": 6310 }, { "epoch": 0.5534227880777552, "grad_norm": 0.06689453125, "learning_rate": 0.0016053665959605958, "loss": 1.1908, "step": 6311 }, { "epoch": 0.5535104798521298, "grad_norm": 0.05322265625, "learning_rate": 0.0016049591355885246, "loss": 1.156, "step": 6312 }, { "epoch": 0.5535981716265043, "grad_norm": 0.07177734375, "learning_rate": 0.0016045516793240626, "loss": 1.1992, "step": 6313 }, { "epoch": 0.5536858634008789, "grad_norm": 0.049560546875, "learning_rate": 0.001604144227204371, "loss": 1.1687, "step": 6314 }, { "epoch": 0.5537735551752534, "grad_norm": 0.0751953125, "learning_rate": 0.001603736779266607, "loss": 1.1995, "step": 6315 }, { "epoch": 0.5538612469496279, "grad_norm": 0.064453125, "learning_rate": 0.0016033293355479294, "loss": 1.2177, "step": 6316 }, { "epoch": 0.5539489387240025, "grad_norm": 0.05810546875, "learning_rate": 0.0016029218960854957, "loss": 1.1486, "step": 6317 }, { "epoch": 0.554036630498377, "grad_norm": 0.09033203125, "learning_rate": 0.001602514460916464, "loss": 1.1483, "step": 6318 }, { "epoch": 0.5541243222727515, "grad_norm": 0.07421875, "learning_rate": 0.0016021070300779906, "loss": 1.1721, "step": 6319 }, { "epoch": 0.5542120140471261, "grad_norm": 0.07470703125, "learning_rate": 0.0016016996036072326, "loss": 1.162, "step": 6320 }, { "epoch": 0.5542997058215007, "grad_norm": 0.10205078125, "learning_rate": 0.0016012921815413458, "loss": 1.2225, "step": 6321 }, { "epoch": 0.5543873975958752, "grad_norm": 0.06640625, "learning_rate": 0.0016008847639174867, "loss": 1.194, "step": 6322 }, { "epoch": 0.5544750893702497, "grad_norm": 0.06884765625, "learning_rate": 0.0016004773507728098, "loss": 1.149, "step": 6323 }, { "epoch": 0.5545627811446243, "grad_norm": 0.087890625, "learning_rate": 0.0016000699421444711, "loss": 1.2031, "step": 6324 }, { "epoch": 0.5546504729189988, "grad_norm": 0.0693359375, "learning_rate": 0.0015996625380696246, "loss": 1.2087, "step": 6325 }, { "epoch": 0.5547381646933733, "grad_norm": 0.0830078125, "learning_rate": 0.0015992551385854246, "loss": 1.1806, "step": 6326 }, { "epoch": 0.554825856467748, "grad_norm": 0.07177734375, "learning_rate": 0.001598847743729025, "loss": 1.1632, "step": 6327 }, { "epoch": 0.5549135482421225, "grad_norm": 0.0693359375, "learning_rate": 0.0015984403535375792, "loss": 1.2868, "step": 6328 }, { "epoch": 0.555001240016497, "grad_norm": 0.078125, "learning_rate": 0.0015980329680482406, "loss": 1.1877, "step": 6329 }, { "epoch": 0.5550889317908716, "grad_norm": 0.07373046875, "learning_rate": 0.001597625587298161, "loss": 1.2216, "step": 6330 }, { "epoch": 0.5551766235652461, "grad_norm": 0.05078125, "learning_rate": 0.001597218211324492, "loss": 1.1383, "step": 6331 }, { "epoch": 0.5552643153396206, "grad_norm": 0.06689453125, "learning_rate": 0.0015968108401643861, "loss": 1.1609, "step": 6332 }, { "epoch": 0.5553520071139952, "grad_norm": 0.07666015625, "learning_rate": 0.001596403473854994, "loss": 1.162, "step": 6333 }, { "epoch": 0.5554396988883697, "grad_norm": 0.09765625, "learning_rate": 0.0015959961124334674, "loss": 1.1883, "step": 6334 }, { "epoch": 0.5555273906627443, "grad_norm": 0.0537109375, "learning_rate": 0.0015955887559369556, "loss": 1.1254, "step": 6335 }, { "epoch": 0.5556150824371189, "grad_norm": 0.0908203125, "learning_rate": 0.0015951814044026086, "loss": 1.1932, "step": 6336 }, { "epoch": 0.5557027742114934, "grad_norm": 0.07568359375, "learning_rate": 0.0015947740578675764, "loss": 1.1882, "step": 6337 }, { "epoch": 0.5557904659858679, "grad_norm": 0.1025390625, "learning_rate": 0.0015943667163690074, "loss": 1.1862, "step": 6338 }, { "epoch": 0.5558781577602425, "grad_norm": 0.0947265625, "learning_rate": 0.0015939593799440502, "loss": 1.1493, "step": 6339 }, { "epoch": 0.555965849534617, "grad_norm": 0.055908203125, "learning_rate": 0.0015935520486298528, "loss": 1.2066, "step": 6340 }, { "epoch": 0.5560535413089915, "grad_norm": 0.12353515625, "learning_rate": 0.0015931447224635628, "loss": 1.2367, "step": 6341 }, { "epoch": 0.556141233083366, "grad_norm": 0.08544921875, "learning_rate": 0.0015927374014823273, "loss": 1.1762, "step": 6342 }, { "epoch": 0.5562289248577407, "grad_norm": 0.08349609375, "learning_rate": 0.0015923300857232931, "loss": 1.1653, "step": 6343 }, { "epoch": 0.5563166166321152, "grad_norm": 0.06787109375, "learning_rate": 0.0015919227752236066, "loss": 1.1891, "step": 6344 }, { "epoch": 0.5564043084064897, "grad_norm": 0.0693359375, "learning_rate": 0.0015915154700204123, "loss": 1.1844, "step": 6345 }, { "epoch": 0.5564920001808643, "grad_norm": 0.061767578125, "learning_rate": 0.001591108170150857, "loss": 1.1874, "step": 6346 }, { "epoch": 0.5565796919552388, "grad_norm": 0.046875, "learning_rate": 0.0015907008756520843, "loss": 1.2537, "step": 6347 }, { "epoch": 0.5566673837296133, "grad_norm": 0.06640625, "learning_rate": 0.0015902935865612384, "loss": 1.1783, "step": 6348 }, { "epoch": 0.556755075503988, "grad_norm": 0.051513671875, "learning_rate": 0.0015898863029154635, "loss": 1.1118, "step": 6349 }, { "epoch": 0.5568427672783625, "grad_norm": 0.06591796875, "learning_rate": 0.0015894790247519021, "loss": 1.2362, "step": 6350 }, { "epoch": 0.556930459052737, "grad_norm": 0.053955078125, "learning_rate": 0.0015890717521076976, "loss": 1.2775, "step": 6351 }, { "epoch": 0.5570181508271116, "grad_norm": 0.05615234375, "learning_rate": 0.0015886644850199923, "loss": 1.1843, "step": 6352 }, { "epoch": 0.5571058426014861, "grad_norm": 0.059326171875, "learning_rate": 0.0015882572235259276, "loss": 1.1587, "step": 6353 }, { "epoch": 0.5571935343758606, "grad_norm": 0.05859375, "learning_rate": 0.0015878499676626448, "loss": 1.2891, "step": 6354 }, { "epoch": 0.5572812261502352, "grad_norm": 0.058837890625, "learning_rate": 0.0015874427174672843, "loss": 1.2101, "step": 6355 }, { "epoch": 0.5573689179246097, "grad_norm": 0.06787109375, "learning_rate": 0.0015870354729769867, "loss": 1.2138, "step": 6356 }, { "epoch": 0.5574566096989843, "grad_norm": 0.06494140625, "learning_rate": 0.001586628234228891, "loss": 1.1904, "step": 6357 }, { "epoch": 0.5575443014733589, "grad_norm": 0.09765625, "learning_rate": 0.0015862210012601368, "loss": 1.1839, "step": 6358 }, { "epoch": 0.5576319932477334, "grad_norm": 0.11376953125, "learning_rate": 0.0015858137741078628, "loss": 1.1948, "step": 6359 }, { "epoch": 0.5577196850221079, "grad_norm": 0.058349609375, "learning_rate": 0.0015854065528092072, "loss": 1.153, "step": 6360 }, { "epoch": 0.5578073767964825, "grad_norm": 0.103515625, "learning_rate": 0.0015849993374013067, "loss": 1.1781, "step": 6361 }, { "epoch": 0.557895068570857, "grad_norm": 0.07861328125, "learning_rate": 0.001584592127921299, "loss": 1.2134, "step": 6362 }, { "epoch": 0.5579827603452315, "grad_norm": 0.0546875, "learning_rate": 0.0015841849244063214, "loss": 1.1946, "step": 6363 }, { "epoch": 0.5580704521196062, "grad_norm": 0.09375, "learning_rate": 0.0015837777268935076, "loss": 1.2231, "step": 6364 }, { "epoch": 0.5581581438939807, "grad_norm": 0.061279296875, "learning_rate": 0.0015833705354199943, "loss": 1.209, "step": 6365 }, { "epoch": 0.5582458356683552, "grad_norm": 0.0849609375, "learning_rate": 0.0015829633500229164, "loss": 1.168, "step": 6366 }, { "epoch": 0.5583335274427297, "grad_norm": 0.05908203125, "learning_rate": 0.0015825561707394078, "loss": 1.2113, "step": 6367 }, { "epoch": 0.5584212192171043, "grad_norm": 0.055419921875, "learning_rate": 0.0015821489976066024, "loss": 1.2056, "step": 6368 }, { "epoch": 0.5585089109914788, "grad_norm": 0.08251953125, "learning_rate": 0.0015817418306616332, "loss": 1.1898, "step": 6369 }, { "epoch": 0.5585966027658533, "grad_norm": 0.0615234375, "learning_rate": 0.0015813346699416328, "loss": 1.2063, "step": 6370 }, { "epoch": 0.5586842945402279, "grad_norm": 0.056640625, "learning_rate": 0.0015809275154837332, "loss": 1.1843, "step": 6371 }, { "epoch": 0.5587719863146025, "grad_norm": 0.0576171875, "learning_rate": 0.001580520367325066, "loss": 1.2443, "step": 6372 }, { "epoch": 0.558859678088977, "grad_norm": 0.072265625, "learning_rate": 0.0015801132255027618, "loss": 1.1903, "step": 6373 }, { "epoch": 0.5589473698633516, "grad_norm": 0.052978515625, "learning_rate": 0.001579706090053951, "loss": 1.1713, "step": 6374 }, { "epoch": 0.5590350616377261, "grad_norm": 0.06640625, "learning_rate": 0.0015792989610157637, "loss": 1.2142, "step": 6375 }, { "epoch": 0.5591227534121006, "grad_norm": 0.06201171875, "learning_rate": 0.0015788918384253283, "loss": 1.2035, "step": 6376 }, { "epoch": 0.5592104451864752, "grad_norm": 0.06591796875, "learning_rate": 0.0015784847223197734, "loss": 1.2559, "step": 6377 }, { "epoch": 0.5592981369608497, "grad_norm": 0.10107421875, "learning_rate": 0.001578077612736228, "loss": 1.1922, "step": 6378 }, { "epoch": 0.5593858287352242, "grad_norm": 0.04833984375, "learning_rate": 0.001577670509711818, "loss": 1.224, "step": 6379 }, { "epoch": 0.5594735205095989, "grad_norm": 0.0634765625, "learning_rate": 0.0015772634132836714, "loss": 1.2317, "step": 6380 }, { "epoch": 0.5595612122839734, "grad_norm": 0.099609375, "learning_rate": 0.0015768563234889134, "loss": 1.2015, "step": 6381 }, { "epoch": 0.5596489040583479, "grad_norm": 0.05908203125, "learning_rate": 0.0015764492403646697, "loss": 1.2169, "step": 6382 }, { "epoch": 0.5597365958327225, "grad_norm": 0.060302734375, "learning_rate": 0.0015760421639480656, "loss": 1.2163, "step": 6383 }, { "epoch": 0.559824287607097, "grad_norm": 0.05126953125, "learning_rate": 0.001575635094276225, "loss": 1.2011, "step": 6384 }, { "epoch": 0.5599119793814715, "grad_norm": 0.049560546875, "learning_rate": 0.0015752280313862722, "loss": 1.2288, "step": 6385 }, { "epoch": 0.5599996711558461, "grad_norm": 0.053466796875, "learning_rate": 0.0015748209753153294, "loss": 1.1885, "step": 6386 }, { "epoch": 0.5600873629302207, "grad_norm": 0.062255859375, "learning_rate": 0.0015744139261005203, "loss": 1.197, "step": 6387 }, { "epoch": 0.5601750547045952, "grad_norm": 0.05517578125, "learning_rate": 0.0015740068837789653, "loss": 1.2942, "step": 6388 }, { "epoch": 0.5602627464789698, "grad_norm": 0.0595703125, "learning_rate": 0.0015735998483877869, "loss": 1.1611, "step": 6389 }, { "epoch": 0.5603504382533443, "grad_norm": 0.08251953125, "learning_rate": 0.0015731928199641044, "loss": 1.1487, "step": 6390 }, { "epoch": 0.5604381300277188, "grad_norm": 0.08154296875, "learning_rate": 0.0015727857985450393, "loss": 1.2072, "step": 6391 }, { "epoch": 0.5605258218020933, "grad_norm": 0.057861328125, "learning_rate": 0.0015723787841677094, "loss": 1.1653, "step": 6392 }, { "epoch": 0.5606135135764679, "grad_norm": 0.1142578125, "learning_rate": 0.0015719717768692342, "loss": 1.2136, "step": 6393 }, { "epoch": 0.5607012053508424, "grad_norm": 0.08642578125, "learning_rate": 0.0015715647766867318, "loss": 1.1893, "step": 6394 }, { "epoch": 0.560788897125217, "grad_norm": 0.0712890625, "learning_rate": 0.0015711577836573188, "loss": 1.2301, "step": 6395 }, { "epoch": 0.5608765888995916, "grad_norm": 0.0927734375, "learning_rate": 0.0015707507978181129, "loss": 1.2091, "step": 6396 }, { "epoch": 0.5609642806739661, "grad_norm": 0.0634765625, "learning_rate": 0.0015703438192062294, "loss": 1.187, "step": 6397 }, { "epoch": 0.5610519724483406, "grad_norm": 0.059326171875, "learning_rate": 0.0015699368478587841, "loss": 1.1577, "step": 6398 }, { "epoch": 0.5611396642227152, "grad_norm": 0.06201171875, "learning_rate": 0.0015695298838128916, "loss": 1.2093, "step": 6399 }, { "epoch": 0.5612273559970897, "grad_norm": 0.0927734375, "learning_rate": 0.0015691229271056657, "loss": 1.1618, "step": 6400 }, { "epoch": 0.5613150477714642, "grad_norm": 0.07373046875, "learning_rate": 0.0015687159777742201, "loss": 1.1999, "step": 6401 }, { "epoch": 0.5614027395458389, "grad_norm": 0.0634765625, "learning_rate": 0.0015683090358556675, "loss": 1.2146, "step": 6402 }, { "epoch": 0.5614904313202134, "grad_norm": 0.07275390625, "learning_rate": 0.0015679021013871202, "loss": 1.24, "step": 6403 }, { "epoch": 0.5615781230945879, "grad_norm": 0.076171875, "learning_rate": 0.001567495174405689, "loss": 1.215, "step": 6404 }, { "epoch": 0.5616658148689625, "grad_norm": 0.06396484375, "learning_rate": 0.001567088254948485, "loss": 1.2627, "step": 6405 }, { "epoch": 0.561753506643337, "grad_norm": 0.0625, "learning_rate": 0.001566681343052618, "loss": 1.2215, "step": 6406 }, { "epoch": 0.5618411984177115, "grad_norm": 0.064453125, "learning_rate": 0.0015662744387551976, "loss": 1.1994, "step": 6407 }, { "epoch": 0.5619288901920861, "grad_norm": 0.05224609375, "learning_rate": 0.0015658675420933321, "loss": 1.1595, "step": 6408 }, { "epoch": 0.5620165819664606, "grad_norm": 0.068359375, "learning_rate": 0.00156546065310413, "loss": 1.1884, "step": 6409 }, { "epoch": 0.5621042737408352, "grad_norm": 0.057373046875, "learning_rate": 0.0015650537718246977, "loss": 1.2043, "step": 6410 }, { "epoch": 0.5621919655152098, "grad_norm": 0.0615234375, "learning_rate": 0.0015646468982921423, "loss": 1.1482, "step": 6411 }, { "epoch": 0.5622796572895843, "grad_norm": 0.057373046875, "learning_rate": 0.0015642400325435693, "loss": 1.2052, "step": 6412 }, { "epoch": 0.5623673490639588, "grad_norm": 0.06298828125, "learning_rate": 0.001563833174616084, "loss": 1.1962, "step": 6413 }, { "epoch": 0.5624550408383333, "grad_norm": 0.08642578125, "learning_rate": 0.001563426324546791, "loss": 1.2828, "step": 6414 }, { "epoch": 0.5625427326127079, "grad_norm": 0.055908203125, "learning_rate": 0.0015630194823727935, "loss": 1.2512, "step": 6415 }, { "epoch": 0.5626304243870824, "grad_norm": 0.06201171875, "learning_rate": 0.0015626126481311949, "loss": 1.1512, "step": 6416 }, { "epoch": 0.562718116161457, "grad_norm": 0.0654296875, "learning_rate": 0.0015622058218590967, "loss": 1.2233, "step": 6417 }, { "epoch": 0.5628058079358316, "grad_norm": 0.08447265625, "learning_rate": 0.0015617990035936011, "loss": 1.1962, "step": 6418 }, { "epoch": 0.5628934997102061, "grad_norm": 0.056640625, "learning_rate": 0.0015613921933718088, "loss": 1.2397, "step": 6419 }, { "epoch": 0.5629811914845806, "grad_norm": 0.05419921875, "learning_rate": 0.0015609853912308196, "loss": 1.2184, "step": 6420 }, { "epoch": 0.5630688832589552, "grad_norm": 0.05419921875, "learning_rate": 0.0015605785972077329, "loss": 1.1528, "step": 6421 }, { "epoch": 0.5631565750333297, "grad_norm": 0.054443359375, "learning_rate": 0.0015601718113396475, "loss": 1.2188, "step": 6422 }, { "epoch": 0.5632442668077042, "grad_norm": 0.051513671875, "learning_rate": 0.0015597650336636607, "loss": 1.1845, "step": 6423 }, { "epoch": 0.5633319585820789, "grad_norm": 0.09033203125, "learning_rate": 0.00155935826421687, "loss": 1.1497, "step": 6424 }, { "epoch": 0.5634196503564534, "grad_norm": 0.064453125, "learning_rate": 0.0015589515030363715, "loss": 1.2438, "step": 6425 }, { "epoch": 0.5635073421308279, "grad_norm": 0.0595703125, "learning_rate": 0.0015585447501592613, "loss": 1.1254, "step": 6426 }, { "epoch": 0.5635950339052025, "grad_norm": 0.07373046875, "learning_rate": 0.0015581380056226334, "loss": 1.201, "step": 6427 }, { "epoch": 0.563682725679577, "grad_norm": 0.049560546875, "learning_rate": 0.0015577312694635824, "loss": 1.1229, "step": 6428 }, { "epoch": 0.5637704174539515, "grad_norm": 0.056640625, "learning_rate": 0.0015573245417192015, "loss": 1.1525, "step": 6429 }, { "epoch": 0.5638581092283261, "grad_norm": 0.05322265625, "learning_rate": 0.001556917822426583, "loss": 1.1542, "step": 6430 }, { "epoch": 0.5639458010027006, "grad_norm": 0.060546875, "learning_rate": 0.0015565111116228197, "loss": 1.2266, "step": 6431 }, { "epoch": 0.5640334927770752, "grad_norm": 0.0625, "learning_rate": 0.0015561044093450006, "loss": 1.1664, "step": 6432 }, { "epoch": 0.5641211845514498, "grad_norm": 0.05908203125, "learning_rate": 0.0015556977156302172, "loss": 1.247, "step": 6433 }, { "epoch": 0.5642088763258243, "grad_norm": 0.05126953125, "learning_rate": 0.0015552910305155587, "loss": 1.157, "step": 6434 }, { "epoch": 0.5642965681001988, "grad_norm": 0.0654296875, "learning_rate": 0.0015548843540381136, "loss": 1.1882, "step": 6435 }, { "epoch": 0.5643842598745734, "grad_norm": 0.05419921875, "learning_rate": 0.0015544776862349704, "loss": 1.1823, "step": 6436 }, { "epoch": 0.5644719516489479, "grad_norm": 0.0625, "learning_rate": 0.001554071027143215, "loss": 1.1976, "step": 6437 }, { "epoch": 0.5645596434233224, "grad_norm": 0.06689453125, "learning_rate": 0.0015536643767999347, "loss": 1.2089, "step": 6438 }, { "epoch": 0.5646473351976969, "grad_norm": 0.05224609375, "learning_rate": 0.0015532577352422143, "loss": 1.2329, "step": 6439 }, { "epoch": 0.5647350269720716, "grad_norm": 0.08251953125, "learning_rate": 0.0015528511025071387, "loss": 1.1917, "step": 6440 }, { "epoch": 0.5648227187464461, "grad_norm": 0.0615234375, "learning_rate": 0.001552444478631792, "loss": 1.1913, "step": 6441 }, { "epoch": 0.5649104105208206, "grad_norm": 0.061767578125, "learning_rate": 0.001552037863653257, "loss": 1.1706, "step": 6442 }, { "epoch": 0.5649981022951952, "grad_norm": 0.078125, "learning_rate": 0.0015516312576086158, "loss": 1.1696, "step": 6443 }, { "epoch": 0.5650857940695697, "grad_norm": 0.048828125, "learning_rate": 0.0015512246605349502, "loss": 1.2054, "step": 6444 }, { "epoch": 0.5651734858439442, "grad_norm": 0.0546875, "learning_rate": 0.0015508180724693398, "loss": 1.1876, "step": 6445 }, { "epoch": 0.5652611776183188, "grad_norm": 0.0595703125, "learning_rate": 0.0015504114934488662, "loss": 1.1909, "step": 6446 }, { "epoch": 0.5653488693926934, "grad_norm": 0.05419921875, "learning_rate": 0.0015500049235106068, "loss": 1.1598, "step": 6447 }, { "epoch": 0.5654365611670679, "grad_norm": 0.05322265625, "learning_rate": 0.0015495983626916403, "loss": 1.1926, "step": 6448 }, { "epoch": 0.5655242529414425, "grad_norm": 0.0498046875, "learning_rate": 0.0015491918110290444, "loss": 1.1784, "step": 6449 }, { "epoch": 0.565611944715817, "grad_norm": 0.068359375, "learning_rate": 0.0015487852685598942, "loss": 1.2205, "step": 6450 }, { "epoch": 0.5656996364901915, "grad_norm": 0.0517578125, "learning_rate": 0.0015483787353212668, "loss": 1.1708, "step": 6451 }, { "epoch": 0.5657873282645661, "grad_norm": 0.107421875, "learning_rate": 0.001547972211350236, "loss": 1.2142, "step": 6452 }, { "epoch": 0.5658750200389406, "grad_norm": 0.05029296875, "learning_rate": 0.001547565696683877, "loss": 1.2007, "step": 6453 }, { "epoch": 0.5659627118133151, "grad_norm": 0.10546875, "learning_rate": 0.001547159191359261, "loss": 1.2165, "step": 6454 }, { "epoch": 0.5660504035876898, "grad_norm": 0.05908203125, "learning_rate": 0.0015467526954134616, "loss": 1.2148, "step": 6455 }, { "epoch": 0.5661380953620643, "grad_norm": 0.0859375, "learning_rate": 0.0015463462088835499, "loss": 1.1984, "step": 6456 }, { "epoch": 0.5662257871364388, "grad_norm": 0.09619140625, "learning_rate": 0.0015459397318065965, "loss": 1.1577, "step": 6457 }, { "epoch": 0.5663134789108134, "grad_norm": 0.08984375, "learning_rate": 0.0015455332642196709, "loss": 1.1741, "step": 6458 }, { "epoch": 0.5664011706851879, "grad_norm": 0.0732421875, "learning_rate": 0.0015451268061598415, "loss": 1.2214, "step": 6459 }, { "epoch": 0.5664888624595624, "grad_norm": 0.072265625, "learning_rate": 0.0015447203576641772, "loss": 1.1945, "step": 6460 }, { "epoch": 0.5665765542339369, "grad_norm": 0.07861328125, "learning_rate": 0.001544313918769744, "loss": 1.2126, "step": 6461 }, { "epoch": 0.5666642460083116, "grad_norm": 0.1650390625, "learning_rate": 0.001543907489513609, "loss": 1.2051, "step": 6462 }, { "epoch": 0.5667519377826861, "grad_norm": 0.09619140625, "learning_rate": 0.001543501069932837, "loss": 1.2025, "step": 6463 }, { "epoch": 0.5668396295570606, "grad_norm": 0.1162109375, "learning_rate": 0.0015430946600644925, "loss": 1.1974, "step": 6464 }, { "epoch": 0.5669273213314352, "grad_norm": 0.09765625, "learning_rate": 0.0015426882599456394, "loss": 1.2762, "step": 6465 }, { "epoch": 0.5670150131058097, "grad_norm": 0.091796875, "learning_rate": 0.0015422818696133394, "loss": 1.1344, "step": 6466 }, { "epoch": 0.5671027048801842, "grad_norm": 0.1357421875, "learning_rate": 0.001541875489104655, "loss": 1.2072, "step": 6467 }, { "epoch": 0.5671903966545588, "grad_norm": 0.10546875, "learning_rate": 0.001541469118456647, "loss": 1.2383, "step": 6468 }, { "epoch": 0.5672780884289333, "grad_norm": 0.1337890625, "learning_rate": 0.0015410627577063752, "loss": 1.1938, "step": 6469 }, { "epoch": 0.5673657802033079, "grad_norm": 0.109375, "learning_rate": 0.001540656406890898, "loss": 1.1509, "step": 6470 }, { "epoch": 0.5674534719776825, "grad_norm": 0.0810546875, "learning_rate": 0.0015402500660472752, "loss": 1.1804, "step": 6471 }, { "epoch": 0.567541163752057, "grad_norm": 0.09765625, "learning_rate": 0.0015398437352125624, "loss": 1.2559, "step": 6472 }, { "epoch": 0.5676288555264315, "grad_norm": 0.10888671875, "learning_rate": 0.0015394374144238167, "loss": 1.1304, "step": 6473 }, { "epoch": 0.5677165473008061, "grad_norm": 0.0634765625, "learning_rate": 0.0015390311037180935, "loss": 1.1556, "step": 6474 }, { "epoch": 0.5678042390751806, "grad_norm": 0.052734375, "learning_rate": 0.0015386248031324474, "loss": 1.2176, "step": 6475 }, { "epoch": 0.5678919308495551, "grad_norm": 0.08544921875, "learning_rate": 0.0015382185127039312, "loss": 1.1446, "step": 6476 }, { "epoch": 0.5679796226239298, "grad_norm": 0.1025390625, "learning_rate": 0.0015378122324695989, "loss": 1.1888, "step": 6477 }, { "epoch": 0.5680673143983043, "grad_norm": 0.054931640625, "learning_rate": 0.0015374059624665005, "loss": 1.1614, "step": 6478 }, { "epoch": 0.5681550061726788, "grad_norm": 0.06591796875, "learning_rate": 0.0015369997027316884, "loss": 1.1972, "step": 6479 }, { "epoch": 0.5682426979470534, "grad_norm": 0.058349609375, "learning_rate": 0.001536593453302211, "loss": 1.1357, "step": 6480 }, { "epoch": 0.5683303897214279, "grad_norm": 0.07666015625, "learning_rate": 0.0015361872142151186, "loss": 1.2006, "step": 6481 }, { "epoch": 0.5684180814958024, "grad_norm": 0.052490234375, "learning_rate": 0.0015357809855074582, "loss": 1.1723, "step": 6482 }, { "epoch": 0.568505773270177, "grad_norm": 0.11474609375, "learning_rate": 0.001535374767216277, "loss": 1.1958, "step": 6483 }, { "epoch": 0.5685934650445515, "grad_norm": 0.0703125, "learning_rate": 0.0015349685593786214, "loss": 1.1226, "step": 6484 }, { "epoch": 0.5686811568189261, "grad_norm": 0.053955078125, "learning_rate": 0.0015345623620315356, "loss": 1.1709, "step": 6485 }, { "epoch": 0.5687688485933006, "grad_norm": 0.07763671875, "learning_rate": 0.001534156175212065, "loss": 1.245, "step": 6486 }, { "epoch": 0.5688565403676752, "grad_norm": 0.07568359375, "learning_rate": 0.0015337499989572518, "loss": 1.1888, "step": 6487 }, { "epoch": 0.5689442321420497, "grad_norm": 0.053466796875, "learning_rate": 0.0015333438333041383, "loss": 1.2331, "step": 6488 }, { "epoch": 0.5690319239164242, "grad_norm": 0.058349609375, "learning_rate": 0.0015329376782897666, "loss": 1.2039, "step": 6489 }, { "epoch": 0.5691196156907988, "grad_norm": 0.055908203125, "learning_rate": 0.001532531533951176, "loss": 1.1693, "step": 6490 }, { "epoch": 0.5692073074651733, "grad_norm": 0.053955078125, "learning_rate": 0.0015321254003254068, "loss": 1.147, "step": 6491 }, { "epoch": 0.5692949992395478, "grad_norm": 0.060791015625, "learning_rate": 0.0015317192774494963, "loss": 1.1743, "step": 6492 }, { "epoch": 0.5693826910139225, "grad_norm": 0.052734375, "learning_rate": 0.0015313131653604828, "loss": 1.2275, "step": 6493 }, { "epoch": 0.569470382788297, "grad_norm": 0.05859375, "learning_rate": 0.001530907064095402, "loss": 1.187, "step": 6494 }, { "epoch": 0.5695580745626715, "grad_norm": 0.05810546875, "learning_rate": 0.0015305009736912897, "loss": 1.1531, "step": 6495 }, { "epoch": 0.5696457663370461, "grad_norm": 0.052490234375, "learning_rate": 0.0015300948941851797, "loss": 1.2283, "step": 6496 }, { "epoch": 0.5697334581114206, "grad_norm": 0.076171875, "learning_rate": 0.0015296888256141061, "loss": 1.1836, "step": 6497 }, { "epoch": 0.5698211498857951, "grad_norm": 0.052490234375, "learning_rate": 0.001529282768015101, "loss": 1.1491, "step": 6498 }, { "epoch": 0.5699088416601698, "grad_norm": 0.045166015625, "learning_rate": 0.0015288767214251965, "loss": 1.1279, "step": 6499 }, { "epoch": 0.5699965334345443, "grad_norm": 0.05224609375, "learning_rate": 0.0015284706858814214, "loss": 1.1854, "step": 6500 }, { "epoch": 0.5699965334345443, "eval_loss": 1.2018574476242065, "eval_runtime": 428.7209, "eval_samples_per_second": 33.698, "eval_steps_per_second": 8.425, "step": 6500 }, { "epoch": 0.5700842252089188, "grad_norm": 0.080078125, "learning_rate": 0.0015280646614208061, "loss": 1.1821, "step": 6501 }, { "epoch": 0.5701719169832934, "grad_norm": 0.10205078125, "learning_rate": 0.0015276586480803794, "loss": 1.1674, "step": 6502 }, { "epoch": 0.5702596087576679, "grad_norm": 0.10888671875, "learning_rate": 0.0015272526458971678, "loss": 1.2094, "step": 6503 }, { "epoch": 0.5703473005320424, "grad_norm": 0.11669921875, "learning_rate": 0.001526846654908198, "loss": 1.1393, "step": 6504 }, { "epoch": 0.570434992306417, "grad_norm": 0.068359375, "learning_rate": 0.0015264406751504952, "loss": 1.1569, "step": 6505 }, { "epoch": 0.5705226840807915, "grad_norm": 0.1298828125, "learning_rate": 0.0015260347066610841, "loss": 1.1828, "step": 6506 }, { "epoch": 0.570610375855166, "grad_norm": 0.09814453125, "learning_rate": 0.0015256287494769879, "loss": 1.2321, "step": 6507 }, { "epoch": 0.5706980676295407, "grad_norm": 0.06689453125, "learning_rate": 0.0015252228036352288, "loss": 1.2248, "step": 6508 }, { "epoch": 0.5707857594039152, "grad_norm": 0.0546875, "learning_rate": 0.0015248168691728273, "loss": 1.1061, "step": 6509 }, { "epoch": 0.5708734511782897, "grad_norm": 0.0771484375, "learning_rate": 0.0015244109461268046, "loss": 1.201, "step": 6510 }, { "epoch": 0.5709611429526642, "grad_norm": 0.0537109375, "learning_rate": 0.0015240050345341793, "loss": 1.238, "step": 6511 }, { "epoch": 0.5710488347270388, "grad_norm": 0.12060546875, "learning_rate": 0.001523599134431969, "loss": 1.2105, "step": 6512 }, { "epoch": 0.5711365265014133, "grad_norm": 0.07275390625, "learning_rate": 0.0015231932458571924, "loss": 1.1544, "step": 6513 }, { "epoch": 0.5712242182757878, "grad_norm": 0.05517578125, "learning_rate": 0.0015227873688468637, "loss": 1.0975, "step": 6514 }, { "epoch": 0.5713119100501625, "grad_norm": 0.10595703125, "learning_rate": 0.0015223815034379991, "loss": 1.2065, "step": 6515 }, { "epoch": 0.571399601824537, "grad_norm": 0.08935546875, "learning_rate": 0.001521975649667612, "loss": 1.182, "step": 6516 }, { "epoch": 0.5714872935989115, "grad_norm": 0.0625, "learning_rate": 0.0015215698075727146, "loss": 1.2094, "step": 6517 }, { "epoch": 0.5715749853732861, "grad_norm": 0.0556640625, "learning_rate": 0.0015211639771903196, "loss": 1.2665, "step": 6518 }, { "epoch": 0.5716626771476606, "grad_norm": 0.0673828125, "learning_rate": 0.001520758158557437, "loss": 1.161, "step": 6519 }, { "epoch": 0.5717503689220351, "grad_norm": 0.05029296875, "learning_rate": 0.001520352351711077, "loss": 1.2092, "step": 6520 }, { "epoch": 0.5718380606964097, "grad_norm": 0.0517578125, "learning_rate": 0.0015199465566882475, "loss": 1.1835, "step": 6521 }, { "epoch": 0.5719257524707843, "grad_norm": 0.05517578125, "learning_rate": 0.0015195407735259564, "loss": 1.1659, "step": 6522 }, { "epoch": 0.5720134442451588, "grad_norm": 0.053955078125, "learning_rate": 0.0015191350022612101, "loss": 1.2167, "step": 6523 }, { "epoch": 0.5721011360195334, "grad_norm": 0.047119140625, "learning_rate": 0.0015187292429310138, "loss": 1.1726, "step": 6524 }, { "epoch": 0.5721888277939079, "grad_norm": 0.054443359375, "learning_rate": 0.0015183234955723714, "loss": 1.1625, "step": 6525 }, { "epoch": 0.5722765195682824, "grad_norm": 0.06298828125, "learning_rate": 0.0015179177602222865, "loss": 1.2316, "step": 6526 }, { "epoch": 0.572364211342657, "grad_norm": 0.056884765625, "learning_rate": 0.0015175120369177607, "loss": 1.2008, "step": 6527 }, { "epoch": 0.5724519031170315, "grad_norm": 0.06494140625, "learning_rate": 0.0015171063256957957, "loss": 1.1753, "step": 6528 }, { "epoch": 0.572539594891406, "grad_norm": 0.06982421875, "learning_rate": 0.0015167006265933898, "loss": 1.2557, "step": 6529 }, { "epoch": 0.5726272866657807, "grad_norm": 0.0537109375, "learning_rate": 0.0015162949396475432, "loss": 1.1326, "step": 6530 }, { "epoch": 0.5727149784401552, "grad_norm": 0.06396484375, "learning_rate": 0.0015158892648952527, "loss": 1.1204, "step": 6531 }, { "epoch": 0.5728026702145297, "grad_norm": 0.08203125, "learning_rate": 0.0015154836023735156, "loss": 1.2554, "step": 6532 }, { "epoch": 0.5728903619889042, "grad_norm": 0.072265625, "learning_rate": 0.0015150779521193266, "loss": 1.1393, "step": 6533 }, { "epoch": 0.5729780537632788, "grad_norm": 0.10888671875, "learning_rate": 0.0015146723141696797, "loss": 1.2406, "step": 6534 }, { "epoch": 0.5730657455376533, "grad_norm": 0.0771484375, "learning_rate": 0.0015142666885615689, "loss": 1.2451, "step": 6535 }, { "epoch": 0.5731534373120278, "grad_norm": 0.07861328125, "learning_rate": 0.001513861075331985, "loss": 1.162, "step": 6536 }, { "epoch": 0.5732411290864025, "grad_norm": 0.1025390625, "learning_rate": 0.0015134554745179199, "loss": 1.1696, "step": 6537 }, { "epoch": 0.573328820860777, "grad_norm": 0.05859375, "learning_rate": 0.0015130498861563632, "loss": 1.1485, "step": 6538 }, { "epoch": 0.5734165126351515, "grad_norm": 0.053466796875, "learning_rate": 0.001512644310284303, "loss": 1.3001, "step": 6539 }, { "epoch": 0.5735042044095261, "grad_norm": 0.078125, "learning_rate": 0.0015122387469387272, "loss": 1.1902, "step": 6540 }, { "epoch": 0.5735918961839006, "grad_norm": 0.0615234375, "learning_rate": 0.001511833196156622, "loss": 1.1898, "step": 6541 }, { "epoch": 0.5736795879582751, "grad_norm": 0.05810546875, "learning_rate": 0.001511427657974973, "loss": 1.1731, "step": 6542 }, { "epoch": 0.5737672797326497, "grad_norm": 0.056640625, "learning_rate": 0.0015110221324307635, "loss": 1.21, "step": 6543 }, { "epoch": 0.5738549715070242, "grad_norm": 0.06640625, "learning_rate": 0.001510616619560977, "loss": 1.1595, "step": 6544 }, { "epoch": 0.5739426632813988, "grad_norm": 0.04833984375, "learning_rate": 0.0015102111194025948, "loss": 1.188, "step": 6545 }, { "epoch": 0.5740303550557734, "grad_norm": 0.06982421875, "learning_rate": 0.001509805631992598, "loss": 1.2177, "step": 6546 }, { "epoch": 0.5741180468301479, "grad_norm": 0.05078125, "learning_rate": 0.0015094001573679654, "loss": 1.2031, "step": 6547 }, { "epoch": 0.5742057386045224, "grad_norm": 0.05810546875, "learning_rate": 0.0015089946955656752, "loss": 1.1625, "step": 6548 }, { "epoch": 0.574293430378897, "grad_norm": 0.052978515625, "learning_rate": 0.0015085892466227058, "loss": 1.134, "step": 6549 }, { "epoch": 0.5743811221532715, "grad_norm": 0.091796875, "learning_rate": 0.001508183810576031, "loss": 1.224, "step": 6550 }, { "epoch": 0.574468813927646, "grad_norm": 0.05322265625, "learning_rate": 0.001507778387462627, "loss": 1.2006, "step": 6551 }, { "epoch": 0.5745565057020207, "grad_norm": 0.072265625, "learning_rate": 0.0015073729773194666, "loss": 1.1596, "step": 6552 }, { "epoch": 0.5746441974763952, "grad_norm": 0.06298828125, "learning_rate": 0.0015069675801835229, "loss": 1.1768, "step": 6553 }, { "epoch": 0.5747318892507697, "grad_norm": 0.049072265625, "learning_rate": 0.0015065621960917664, "loss": 1.1401, "step": 6554 }, { "epoch": 0.5748195810251443, "grad_norm": 0.0693359375, "learning_rate": 0.0015061568250811675, "loss": 1.1616, "step": 6555 }, { "epoch": 0.5749072727995188, "grad_norm": 0.051513671875, "learning_rate": 0.0015057514671886947, "loss": 1.1627, "step": 6556 }, { "epoch": 0.5749949645738933, "grad_norm": 0.06103515625, "learning_rate": 0.0015053461224513157, "loss": 1.2626, "step": 6557 }, { "epoch": 0.5750826563482678, "grad_norm": 0.0625, "learning_rate": 0.0015049407909059967, "loss": 1.2115, "step": 6558 }, { "epoch": 0.5751703481226424, "grad_norm": 0.054443359375, "learning_rate": 0.0015045354725897035, "loss": 1.1681, "step": 6559 }, { "epoch": 0.575258039897017, "grad_norm": 0.07568359375, "learning_rate": 0.001504130167539399, "loss": 1.1701, "step": 6560 }, { "epoch": 0.5753457316713915, "grad_norm": 0.0517578125, "learning_rate": 0.001503724875792048, "loss": 1.1204, "step": 6561 }, { "epoch": 0.5754334234457661, "grad_norm": 0.062255859375, "learning_rate": 0.0015033195973846094, "loss": 1.1467, "step": 6562 }, { "epoch": 0.5755211152201406, "grad_norm": 0.10498046875, "learning_rate": 0.0015029143323540453, "loss": 1.1843, "step": 6563 }, { "epoch": 0.5756088069945151, "grad_norm": 0.08203125, "learning_rate": 0.0015025090807373146, "loss": 1.2083, "step": 6564 }, { "epoch": 0.5756964987688897, "grad_norm": 0.08740234375, "learning_rate": 0.0015021038425713745, "loss": 1.2307, "step": 6565 }, { "epoch": 0.5757841905432642, "grad_norm": 0.056396484375, "learning_rate": 0.0015016986178931835, "loss": 1.1813, "step": 6566 }, { "epoch": 0.5758718823176388, "grad_norm": 0.061279296875, "learning_rate": 0.0015012934067396946, "loss": 1.1761, "step": 6567 }, { "epoch": 0.5759595740920134, "grad_norm": 0.134765625, "learning_rate": 0.0015008882091478626, "loss": 1.2085, "step": 6568 }, { "epoch": 0.5760472658663879, "grad_norm": 0.04931640625, "learning_rate": 0.001500483025154642, "loss": 1.1942, "step": 6569 }, { "epoch": 0.5761349576407624, "grad_norm": 0.1552734375, "learning_rate": 0.001500077854796983, "loss": 1.2157, "step": 6570 }, { "epoch": 0.576222649415137, "grad_norm": 0.109375, "learning_rate": 0.0014996726981118369, "loss": 1.2117, "step": 6571 }, { "epoch": 0.5763103411895115, "grad_norm": 0.08349609375, "learning_rate": 0.0014992675551361522, "loss": 1.2125, "step": 6572 }, { "epoch": 0.576398032963886, "grad_norm": 0.11669921875, "learning_rate": 0.001498862425906878, "loss": 1.2031, "step": 6573 }, { "epoch": 0.5764857247382607, "grad_norm": 0.08984375, "learning_rate": 0.00149845731046096, "loss": 1.2251, "step": 6574 }, { "epoch": 0.5765734165126352, "grad_norm": 0.08837890625, "learning_rate": 0.0014980522088353441, "loss": 1.2252, "step": 6575 }, { "epoch": 0.5766611082870097, "grad_norm": 0.099609375, "learning_rate": 0.0014976471210669745, "loss": 1.2518, "step": 6576 }, { "epoch": 0.5767488000613843, "grad_norm": 0.05126953125, "learning_rate": 0.0014972420471927946, "loss": 1.1963, "step": 6577 }, { "epoch": 0.5768364918357588, "grad_norm": 0.056396484375, "learning_rate": 0.0014968369872497451, "loss": 1.1525, "step": 6578 }, { "epoch": 0.5769241836101333, "grad_norm": 0.04833984375, "learning_rate": 0.0014964319412747672, "loss": 1.1319, "step": 6579 }, { "epoch": 0.5770118753845079, "grad_norm": 0.058349609375, "learning_rate": 0.0014960269093047996, "loss": 1.1579, "step": 6580 }, { "epoch": 0.5770995671588824, "grad_norm": 0.0546875, "learning_rate": 0.001495621891376781, "loss": 1.1602, "step": 6581 }, { "epoch": 0.577187258933257, "grad_norm": 0.060546875, "learning_rate": 0.0014952168875276466, "loss": 1.1919, "step": 6582 }, { "epoch": 0.5772749507076315, "grad_norm": 0.09228515625, "learning_rate": 0.0014948118977943334, "loss": 1.172, "step": 6583 }, { "epoch": 0.5773626424820061, "grad_norm": 0.054931640625, "learning_rate": 0.0014944069222137739, "loss": 1.1349, "step": 6584 }, { "epoch": 0.5774503342563806, "grad_norm": 0.051513671875, "learning_rate": 0.0014940019608229015, "loss": 1.128, "step": 6585 }, { "epoch": 0.5775380260307551, "grad_norm": 0.06005859375, "learning_rate": 0.0014935970136586475, "loss": 1.2041, "step": 6586 }, { "epoch": 0.5776257178051297, "grad_norm": 0.055908203125, "learning_rate": 0.001493192080757942, "loss": 1.1592, "step": 6587 }, { "epoch": 0.5777134095795042, "grad_norm": 0.05078125, "learning_rate": 0.0014927871621577143, "loss": 1.2089, "step": 6588 }, { "epoch": 0.5778011013538787, "grad_norm": 0.080078125, "learning_rate": 0.0014923822578948912, "loss": 1.2122, "step": 6589 }, { "epoch": 0.5778887931282534, "grad_norm": 0.05810546875, "learning_rate": 0.0014919773680063997, "loss": 1.18, "step": 6590 }, { "epoch": 0.5779764849026279, "grad_norm": 0.07666015625, "learning_rate": 0.0014915724925291638, "loss": 1.1905, "step": 6591 }, { "epoch": 0.5780641766770024, "grad_norm": 0.09912109375, "learning_rate": 0.001491167631500108, "loss": 1.2448, "step": 6592 }, { "epoch": 0.578151868451377, "grad_norm": 0.0576171875, "learning_rate": 0.0014907627849561538, "loss": 1.1933, "step": 6593 }, { "epoch": 0.5782395602257515, "grad_norm": 0.0654296875, "learning_rate": 0.001490357952934223, "loss": 1.1396, "step": 6594 }, { "epoch": 0.578327252000126, "grad_norm": 0.0751953125, "learning_rate": 0.0014899531354712345, "loss": 1.0979, "step": 6595 }, { "epoch": 0.5784149437745006, "grad_norm": 0.068359375, "learning_rate": 0.0014895483326041068, "loss": 1.1717, "step": 6596 }, { "epoch": 0.5785026355488752, "grad_norm": 0.06005859375, "learning_rate": 0.0014891435443697572, "loss": 1.1969, "step": 6597 }, { "epoch": 0.5785903273232497, "grad_norm": 0.050048828125, "learning_rate": 0.001488738770805101, "loss": 1.2517, "step": 6598 }, { "epoch": 0.5786780190976243, "grad_norm": 0.05810546875, "learning_rate": 0.0014883340119470525, "loss": 1.1937, "step": 6599 }, { "epoch": 0.5787657108719988, "grad_norm": 0.055419921875, "learning_rate": 0.0014879292678325255, "loss": 1.2299, "step": 6600 }, { "epoch": 0.5788534026463733, "grad_norm": 0.055908203125, "learning_rate": 0.0014875245384984304, "loss": 1.211, "step": 6601 }, { "epoch": 0.5789410944207479, "grad_norm": 0.051513671875, "learning_rate": 0.0014871198239816777, "loss": 1.181, "step": 6602 }, { "epoch": 0.5790287861951224, "grad_norm": 0.0615234375, "learning_rate": 0.0014867151243191765, "loss": 1.1766, "step": 6603 }, { "epoch": 0.579116477969497, "grad_norm": 0.07177734375, "learning_rate": 0.0014863104395478348, "loss": 1.2072, "step": 6604 }, { "epoch": 0.5792041697438715, "grad_norm": 0.05126953125, "learning_rate": 0.0014859057697045584, "loss": 1.2011, "step": 6605 }, { "epoch": 0.5792918615182461, "grad_norm": 0.059814453125, "learning_rate": 0.001485501114826252, "loss": 1.2027, "step": 6606 }, { "epoch": 0.5793795532926206, "grad_norm": 0.07958984375, "learning_rate": 0.0014850964749498193, "loss": 1.2041, "step": 6607 }, { "epoch": 0.5794672450669951, "grad_norm": 0.0546875, "learning_rate": 0.0014846918501121627, "loss": 1.1569, "step": 6608 }, { "epoch": 0.5795549368413697, "grad_norm": 0.0576171875, "learning_rate": 0.0014842872403501824, "loss": 1.1767, "step": 6609 }, { "epoch": 0.5796426286157442, "grad_norm": 0.10205078125, "learning_rate": 0.0014838826457007782, "loss": 1.222, "step": 6610 }, { "epoch": 0.5797303203901187, "grad_norm": 0.0556640625, "learning_rate": 0.0014834780662008472, "loss": 1.1914, "step": 6611 }, { "epoch": 0.5798180121644934, "grad_norm": 0.05029296875, "learning_rate": 0.0014830735018872875, "loss": 1.1883, "step": 6612 }, { "epoch": 0.5799057039388679, "grad_norm": 0.052734375, "learning_rate": 0.0014826689527969931, "loss": 1.229, "step": 6613 }, { "epoch": 0.5799933957132424, "grad_norm": 0.056640625, "learning_rate": 0.0014822644189668584, "loss": 1.2199, "step": 6614 }, { "epoch": 0.580081087487617, "grad_norm": 0.053466796875, "learning_rate": 0.0014818599004337756, "loss": 1.2336, "step": 6615 }, { "epoch": 0.5801687792619915, "grad_norm": 0.055419921875, "learning_rate": 0.001481455397234636, "loss": 1.1572, "step": 6616 }, { "epoch": 0.580256471036366, "grad_norm": 0.07275390625, "learning_rate": 0.0014810509094063291, "loss": 1.1899, "step": 6617 }, { "epoch": 0.5803441628107406, "grad_norm": 0.05078125, "learning_rate": 0.001480646436985743, "loss": 1.2196, "step": 6618 }, { "epoch": 0.5804318545851151, "grad_norm": 0.0703125, "learning_rate": 0.0014802419800097647, "loss": 1.1607, "step": 6619 }, { "epoch": 0.5805195463594897, "grad_norm": 0.057373046875, "learning_rate": 0.0014798375385152792, "loss": 1.1939, "step": 6620 }, { "epoch": 0.5806072381338643, "grad_norm": 0.11572265625, "learning_rate": 0.0014794331125391712, "loss": 1.192, "step": 6621 }, { "epoch": 0.5806949299082388, "grad_norm": 0.05419921875, "learning_rate": 0.001479028702118323, "loss": 1.1452, "step": 6622 }, { "epoch": 0.5807826216826133, "grad_norm": 0.06689453125, "learning_rate": 0.0014786243072896158, "loss": 1.2344, "step": 6623 }, { "epoch": 0.5808703134569879, "grad_norm": 0.0673828125, "learning_rate": 0.0014782199280899296, "loss": 1.1963, "step": 6624 }, { "epoch": 0.5809580052313624, "grad_norm": 0.04833984375, "learning_rate": 0.001477815564556142, "loss": 1.1919, "step": 6625 }, { "epoch": 0.5810456970057369, "grad_norm": 0.05029296875, "learning_rate": 0.001477411216725131, "loss": 1.2402, "step": 6626 }, { "epoch": 0.5811333887801116, "grad_norm": 0.08203125, "learning_rate": 0.001477006884633771, "loss": 1.1616, "step": 6627 }, { "epoch": 0.5812210805544861, "grad_norm": 0.050048828125, "learning_rate": 0.001476602568318937, "loss": 1.1599, "step": 6628 }, { "epoch": 0.5813087723288606, "grad_norm": 0.056884765625, "learning_rate": 0.0014761982678175008, "loss": 1.2005, "step": 6629 }, { "epoch": 0.5813964641032351, "grad_norm": 0.0576171875, "learning_rate": 0.0014757939831663341, "loss": 1.1429, "step": 6630 }, { "epoch": 0.5814841558776097, "grad_norm": 0.060302734375, "learning_rate": 0.001475389714402306, "loss": 1.1721, "step": 6631 }, { "epoch": 0.5815718476519842, "grad_norm": 0.048828125, "learning_rate": 0.0014749854615622858, "loss": 1.1465, "step": 6632 }, { "epoch": 0.5816595394263587, "grad_norm": 0.048095703125, "learning_rate": 0.0014745812246831397, "loss": 1.2053, "step": 6633 }, { "epoch": 0.5817472312007334, "grad_norm": 0.055419921875, "learning_rate": 0.0014741770038017334, "loss": 1.1912, "step": 6634 }, { "epoch": 0.5818349229751079, "grad_norm": 0.050537109375, "learning_rate": 0.0014737727989549295, "loss": 1.1001, "step": 6635 }, { "epoch": 0.5819226147494824, "grad_norm": 0.05224609375, "learning_rate": 0.001473368610179592, "loss": 1.1984, "step": 6636 }, { "epoch": 0.582010306523857, "grad_norm": 0.05029296875, "learning_rate": 0.001472964437512581, "loss": 1.1775, "step": 6637 }, { "epoch": 0.5820979982982315, "grad_norm": 0.054931640625, "learning_rate": 0.0014725602809907562, "loss": 1.1676, "step": 6638 }, { "epoch": 0.582185690072606, "grad_norm": 0.052001953125, "learning_rate": 0.0014721561406509762, "loss": 1.1737, "step": 6639 }, { "epoch": 0.5822733818469806, "grad_norm": 0.050048828125, "learning_rate": 0.0014717520165300967, "loss": 1.1976, "step": 6640 }, { "epoch": 0.5823610736213551, "grad_norm": 0.05126953125, "learning_rate": 0.0014713479086649734, "loss": 1.1912, "step": 6641 }, { "epoch": 0.5824487653957297, "grad_norm": 0.052978515625, "learning_rate": 0.001470943817092459, "loss": 1.1917, "step": 6642 }, { "epoch": 0.5825364571701043, "grad_norm": 0.044189453125, "learning_rate": 0.001470539741849407, "loss": 1.1727, "step": 6643 }, { "epoch": 0.5826241489444788, "grad_norm": 0.054443359375, "learning_rate": 0.0014701356829726667, "loss": 1.1805, "step": 6644 }, { "epoch": 0.5827118407188533, "grad_norm": 0.05712890625, "learning_rate": 0.0014697316404990882, "loss": 1.2088, "step": 6645 }, { "epoch": 0.5827995324932279, "grad_norm": 0.043701171875, "learning_rate": 0.0014693276144655187, "loss": 1.1863, "step": 6646 }, { "epoch": 0.5828872242676024, "grad_norm": 0.049560546875, "learning_rate": 0.001468923604908804, "loss": 1.2167, "step": 6647 }, { "epoch": 0.5829749160419769, "grad_norm": 0.049560546875, "learning_rate": 0.001468519611865789, "loss": 1.2108, "step": 6648 }, { "epoch": 0.5830626078163516, "grad_norm": 0.057373046875, "learning_rate": 0.0014681156353733176, "loss": 1.1905, "step": 6649 }, { "epoch": 0.5831502995907261, "grad_norm": 0.059326171875, "learning_rate": 0.0014677116754682302, "loss": 1.1738, "step": 6650 }, { "epoch": 0.5832379913651006, "grad_norm": 0.0712890625, "learning_rate": 0.001467307732187368, "loss": 1.1847, "step": 6651 }, { "epoch": 0.5833256831394751, "grad_norm": 0.052978515625, "learning_rate": 0.0014669038055675687, "loss": 1.2059, "step": 6652 }, { "epoch": 0.5834133749138497, "grad_norm": 0.050048828125, "learning_rate": 0.0014664998956456696, "loss": 1.1896, "step": 6653 }, { "epoch": 0.5835010666882242, "grad_norm": 0.05712890625, "learning_rate": 0.001466096002458506, "loss": 1.2066, "step": 6654 }, { "epoch": 0.5835887584625987, "grad_norm": 0.0556640625, "learning_rate": 0.0014656921260429129, "loss": 1.1477, "step": 6655 }, { "epoch": 0.5836764502369733, "grad_norm": 0.09619140625, "learning_rate": 0.0014652882664357217, "loss": 1.2276, "step": 6656 }, { "epoch": 0.5837641420113479, "grad_norm": 0.060546875, "learning_rate": 0.0014648844236737637, "loss": 1.1776, "step": 6657 }, { "epoch": 0.5838518337857224, "grad_norm": 0.10400390625, "learning_rate": 0.0014644805977938682, "loss": 1.1729, "step": 6658 }, { "epoch": 0.583939525560097, "grad_norm": 0.06005859375, "learning_rate": 0.0014640767888328637, "loss": 1.2092, "step": 6659 }, { "epoch": 0.5840272173344715, "grad_norm": 0.057373046875, "learning_rate": 0.0014636729968275758, "loss": 1.1754, "step": 6660 }, { "epoch": 0.584114909108846, "grad_norm": 0.0859375, "learning_rate": 0.0014632692218148299, "loss": 1.2176, "step": 6661 }, { "epoch": 0.5842026008832206, "grad_norm": 0.05810546875, "learning_rate": 0.0014628654638314486, "loss": 1.1818, "step": 6662 }, { "epoch": 0.5842902926575951, "grad_norm": 0.087890625, "learning_rate": 0.0014624617229142543, "loss": 1.2092, "step": 6663 }, { "epoch": 0.5843779844319696, "grad_norm": 0.06787109375, "learning_rate": 0.0014620579991000665, "loss": 1.1778, "step": 6664 }, { "epoch": 0.5844656762063443, "grad_norm": 0.0947265625, "learning_rate": 0.001461654292425704, "loss": 1.1837, "step": 6665 }, { "epoch": 0.5845533679807188, "grad_norm": 0.061279296875, "learning_rate": 0.0014612506029279839, "loss": 1.2282, "step": 6666 }, { "epoch": 0.5846410597550933, "grad_norm": 0.052734375, "learning_rate": 0.0014608469306437216, "loss": 1.2579, "step": 6667 }, { "epoch": 0.5847287515294679, "grad_norm": 0.051513671875, "learning_rate": 0.001460443275609731, "loss": 1.1138, "step": 6668 }, { "epoch": 0.5848164433038424, "grad_norm": 0.057373046875, "learning_rate": 0.0014600396378628242, "loss": 1.1599, "step": 6669 }, { "epoch": 0.5849041350782169, "grad_norm": 0.052001953125, "learning_rate": 0.001459636017439812, "loss": 1.1426, "step": 6670 }, { "epoch": 0.5849918268525915, "grad_norm": 0.049072265625, "learning_rate": 0.0014592324143775036, "loss": 1.2251, "step": 6671 }, { "epoch": 0.585079518626966, "grad_norm": 0.051025390625, "learning_rate": 0.0014588288287127067, "loss": 1.1789, "step": 6672 }, { "epoch": 0.5851672104013406, "grad_norm": 0.0810546875, "learning_rate": 0.0014584252604822267, "loss": 1.1798, "step": 6673 }, { "epoch": 0.5852549021757152, "grad_norm": 0.048828125, "learning_rate": 0.0014580217097228687, "loss": 1.1792, "step": 6674 }, { "epoch": 0.5853425939500897, "grad_norm": 0.091796875, "learning_rate": 0.001457618176471435, "loss": 1.162, "step": 6675 }, { "epoch": 0.5854302857244642, "grad_norm": 0.10498046875, "learning_rate": 0.0014572146607647269, "loss": 1.2339, "step": 6676 }, { "epoch": 0.5855179774988387, "grad_norm": 0.0546875, "learning_rate": 0.0014568111626395448, "loss": 1.1491, "step": 6677 }, { "epoch": 0.5856056692732133, "grad_norm": 0.11865234375, "learning_rate": 0.0014564076821326852, "loss": 1.1898, "step": 6678 }, { "epoch": 0.5856933610475878, "grad_norm": 0.10302734375, "learning_rate": 0.0014560042192809455, "loss": 1.1325, "step": 6679 }, { "epoch": 0.5857810528219624, "grad_norm": 0.07373046875, "learning_rate": 0.00145560077412112, "loss": 1.2069, "step": 6680 }, { "epoch": 0.585868744596337, "grad_norm": 0.10205078125, "learning_rate": 0.0014551973466900023, "loss": 1.1937, "step": 6681 }, { "epoch": 0.5859564363707115, "grad_norm": 0.05810546875, "learning_rate": 0.0014547939370243834, "loss": 1.1865, "step": 6682 }, { "epoch": 0.586044128145086, "grad_norm": 0.08544921875, "learning_rate": 0.001454390545161054, "loss": 1.2249, "step": 6683 }, { "epoch": 0.5861318199194606, "grad_norm": 0.08837890625, "learning_rate": 0.0014539871711368017, "loss": 1.1931, "step": 6684 }, { "epoch": 0.5862195116938351, "grad_norm": 0.064453125, "learning_rate": 0.0014535838149884136, "loss": 1.2405, "step": 6685 }, { "epoch": 0.5863072034682096, "grad_norm": 0.06201171875, "learning_rate": 0.0014531804767526748, "loss": 1.1648, "step": 6686 }, { "epoch": 0.5863948952425843, "grad_norm": 0.07763671875, "learning_rate": 0.0014527771564663676, "loss": 1.2357, "step": 6687 }, { "epoch": 0.5864825870169588, "grad_norm": 0.08447265625, "learning_rate": 0.001452373854166275, "loss": 1.1622, "step": 6688 }, { "epoch": 0.5865702787913333, "grad_norm": 0.0615234375, "learning_rate": 0.0014519705698891763, "loss": 1.177, "step": 6689 }, { "epoch": 0.5866579705657079, "grad_norm": 0.057861328125, "learning_rate": 0.001451567303671851, "loss": 1.1982, "step": 6690 }, { "epoch": 0.5867456623400824, "grad_norm": 0.06201171875, "learning_rate": 0.001451164055551075, "loss": 1.203, "step": 6691 }, { "epoch": 0.5868333541144569, "grad_norm": 0.09619140625, "learning_rate": 0.001450760825563624, "loss": 1.1668, "step": 6692 }, { "epoch": 0.5869210458888315, "grad_norm": 0.064453125, "learning_rate": 0.0014503576137462712, "loss": 1.2058, "step": 6693 }, { "epoch": 0.587008737663206, "grad_norm": 0.06689453125, "learning_rate": 0.0014499544201357888, "loss": 1.2154, "step": 6694 }, { "epoch": 0.5870964294375806, "grad_norm": 0.0947265625, "learning_rate": 0.0014495512447689467, "loss": 1.2225, "step": 6695 }, { "epoch": 0.5871841212119552, "grad_norm": 0.08251953125, "learning_rate": 0.001449148087682514, "loss": 1.2029, "step": 6696 }, { "epoch": 0.5872718129863297, "grad_norm": 0.068359375, "learning_rate": 0.0014487449489132569, "loss": 1.1627, "step": 6697 }, { "epoch": 0.5873595047607042, "grad_norm": 0.061767578125, "learning_rate": 0.001448341828497941, "loss": 1.1785, "step": 6698 }, { "epoch": 0.5874471965350788, "grad_norm": 0.06689453125, "learning_rate": 0.00144793872647333, "loss": 1.2131, "step": 6699 }, { "epoch": 0.5875348883094533, "grad_norm": 0.057373046875, "learning_rate": 0.001447535642876185, "loss": 1.2071, "step": 6700 }, { "epoch": 0.5876225800838278, "grad_norm": 0.0751953125, "learning_rate": 0.0014471325777432671, "loss": 1.1728, "step": 6701 }, { "epoch": 0.5877102718582023, "grad_norm": 0.050537109375, "learning_rate": 0.001446729531111335, "loss": 1.1454, "step": 6702 }, { "epoch": 0.587797963632577, "grad_norm": 0.05224609375, "learning_rate": 0.001446326503017144, "loss": 1.1991, "step": 6703 }, { "epoch": 0.5878856554069515, "grad_norm": 0.061767578125, "learning_rate": 0.0014459234934974504, "loss": 1.1952, "step": 6704 }, { "epoch": 0.587973347181326, "grad_norm": 0.06689453125, "learning_rate": 0.0014455205025890071, "loss": 1.1826, "step": 6705 }, { "epoch": 0.5880610389557006, "grad_norm": 0.061767578125, "learning_rate": 0.0014451175303285665, "loss": 1.1696, "step": 6706 }, { "epoch": 0.5881487307300751, "grad_norm": 0.0556640625, "learning_rate": 0.0014447145767528778, "loss": 1.1766, "step": 6707 }, { "epoch": 0.5882364225044496, "grad_norm": 0.06982421875, "learning_rate": 0.0014443116418986902, "loss": 1.1985, "step": 6708 }, { "epoch": 0.5883241142788243, "grad_norm": 0.076171875, "learning_rate": 0.00144390872580275, "loss": 1.2786, "step": 6709 }, { "epoch": 0.5884118060531988, "grad_norm": 0.08837890625, "learning_rate": 0.0014435058285018018, "loss": 1.1861, "step": 6710 }, { "epoch": 0.5884994978275733, "grad_norm": 0.06396484375, "learning_rate": 0.0014431029500325885, "loss": 1.1622, "step": 6711 }, { "epoch": 0.5885871896019479, "grad_norm": 0.142578125, "learning_rate": 0.0014427000904318526, "loss": 1.1825, "step": 6712 }, { "epoch": 0.5886748813763224, "grad_norm": 0.052978515625, "learning_rate": 0.0014422972497363333, "loss": 1.1528, "step": 6713 }, { "epoch": 0.5887625731506969, "grad_norm": 0.08837890625, "learning_rate": 0.0014418944279827686, "loss": 1.1452, "step": 6714 }, { "epoch": 0.5888502649250715, "grad_norm": 0.099609375, "learning_rate": 0.0014414916252078944, "loss": 1.1061, "step": 6715 }, { "epoch": 0.588937956699446, "grad_norm": 0.0537109375, "learning_rate": 0.0014410888414484464, "loss": 1.1581, "step": 6716 }, { "epoch": 0.5890256484738206, "grad_norm": 0.12890625, "learning_rate": 0.001440686076741156, "loss": 1.2451, "step": 6717 }, { "epoch": 0.5891133402481952, "grad_norm": 0.07373046875, "learning_rate": 0.0014402833311227558, "loss": 1.2009, "step": 6718 }, { "epoch": 0.5892010320225697, "grad_norm": 0.051513671875, "learning_rate": 0.0014398806046299747, "loss": 1.2018, "step": 6719 }, { "epoch": 0.5892887237969442, "grad_norm": 0.111328125, "learning_rate": 0.0014394778972995391, "loss": 1.1791, "step": 6720 }, { "epoch": 0.5893764155713188, "grad_norm": 0.053955078125, "learning_rate": 0.0014390752091681764, "loss": 1.1578, "step": 6721 }, { "epoch": 0.5894641073456933, "grad_norm": 0.053466796875, "learning_rate": 0.0014386725402726094, "loss": 1.1215, "step": 6722 }, { "epoch": 0.5895517991200678, "grad_norm": 0.08251953125, "learning_rate": 0.001438269890649562, "loss": 1.2259, "step": 6723 }, { "epoch": 0.5896394908944423, "grad_norm": 0.06396484375, "learning_rate": 0.0014378672603357536, "loss": 1.1691, "step": 6724 }, { "epoch": 0.589727182668817, "grad_norm": 0.05029296875, "learning_rate": 0.0014374646493679037, "loss": 1.1841, "step": 6725 }, { "epoch": 0.5898148744431915, "grad_norm": 0.06982421875, "learning_rate": 0.0014370620577827285, "loss": 1.1364, "step": 6726 }, { "epoch": 0.589902566217566, "grad_norm": 0.06103515625, "learning_rate": 0.0014366594856169445, "loss": 1.2063, "step": 6727 }, { "epoch": 0.5899902579919406, "grad_norm": 0.051513671875, "learning_rate": 0.0014362569329072646, "loss": 1.1785, "step": 6728 }, { "epoch": 0.5900779497663151, "grad_norm": 0.06103515625, "learning_rate": 0.0014358543996904004, "loss": 1.1728, "step": 6729 }, { "epoch": 0.5901656415406896, "grad_norm": 0.049560546875, "learning_rate": 0.0014354518860030623, "loss": 1.1659, "step": 6730 }, { "epoch": 0.5902533333150642, "grad_norm": 0.050048828125, "learning_rate": 0.0014350493918819582, "loss": 1.1712, "step": 6731 }, { "epoch": 0.5903410250894388, "grad_norm": 0.058837890625, "learning_rate": 0.0014346469173637949, "loss": 1.2208, "step": 6732 }, { "epoch": 0.5904287168638133, "grad_norm": 0.1064453125, "learning_rate": 0.0014342444624852767, "loss": 1.1937, "step": 6733 }, { "epoch": 0.5905164086381879, "grad_norm": 0.078125, "learning_rate": 0.0014338420272831066, "loss": 1.2056, "step": 6734 }, { "epoch": 0.5906041004125624, "grad_norm": 0.06494140625, "learning_rate": 0.0014334396117939856, "loss": 1.1854, "step": 6735 }, { "epoch": 0.5906917921869369, "grad_norm": 0.08740234375, "learning_rate": 0.0014330372160546135, "loss": 1.1519, "step": 6736 }, { "epoch": 0.5907794839613115, "grad_norm": 0.0771484375, "learning_rate": 0.0014326348401016867, "loss": 1.1939, "step": 6737 }, { "epoch": 0.590867175735686, "grad_norm": 0.08740234375, "learning_rate": 0.0014322324839719008, "loss": 1.1577, "step": 6738 }, { "epoch": 0.5909548675100605, "grad_norm": 0.12451171875, "learning_rate": 0.0014318301477019512, "loss": 1.2011, "step": 6739 }, { "epoch": 0.5910425592844352, "grad_norm": 0.05029296875, "learning_rate": 0.001431427831328528, "loss": 1.1385, "step": 6740 }, { "epoch": 0.5911302510588097, "grad_norm": 0.06201171875, "learning_rate": 0.0014310255348883231, "loss": 1.1872, "step": 6741 }, { "epoch": 0.5912179428331842, "grad_norm": 0.08447265625, "learning_rate": 0.0014306232584180235, "loss": 1.2181, "step": 6742 }, { "epoch": 0.5913056346075588, "grad_norm": 0.06298828125, "learning_rate": 0.001430221001954317, "loss": 1.1963, "step": 6743 }, { "epoch": 0.5913933263819333, "grad_norm": 0.059814453125, "learning_rate": 0.0014298187655338872, "loss": 1.2064, "step": 6744 }, { "epoch": 0.5914810181563078, "grad_norm": 0.057373046875, "learning_rate": 0.0014294165491934181, "loss": 1.2068, "step": 6745 }, { "epoch": 0.5915687099306824, "grad_norm": 0.05224609375, "learning_rate": 0.0014290143529695901, "loss": 1.1779, "step": 6746 }, { "epoch": 0.591656401705057, "grad_norm": 0.059326171875, "learning_rate": 0.0014286121768990827, "loss": 1.1938, "step": 6747 }, { "epoch": 0.5917440934794315, "grad_norm": 0.07275390625, "learning_rate": 0.0014282100210185728, "loss": 1.1791, "step": 6748 }, { "epoch": 0.591831785253806, "grad_norm": 0.0576171875, "learning_rate": 0.0014278078853647372, "loss": 1.2494, "step": 6749 }, { "epoch": 0.5919194770281806, "grad_norm": 0.07373046875, "learning_rate": 0.001427405769974248, "loss": 1.2027, "step": 6750 }, { "epoch": 0.5920071688025551, "grad_norm": 0.052490234375, "learning_rate": 0.0014270036748837787, "loss": 1.1884, "step": 6751 }, { "epoch": 0.5920948605769296, "grad_norm": 0.05517578125, "learning_rate": 0.001426601600129998, "loss": 1.1751, "step": 6752 }, { "epoch": 0.5921825523513042, "grad_norm": 0.0615234375, "learning_rate": 0.0014261995457495755, "loss": 1.1779, "step": 6753 }, { "epoch": 0.5922702441256787, "grad_norm": 0.0673828125, "learning_rate": 0.0014257975117791762, "loss": 1.1903, "step": 6754 }, { "epoch": 0.5923579359000533, "grad_norm": 0.060546875, "learning_rate": 0.0014253954982554652, "loss": 1.2468, "step": 6755 }, { "epoch": 0.5924456276744279, "grad_norm": 0.0478515625, "learning_rate": 0.0014249935052151044, "loss": 1.1728, "step": 6756 }, { "epoch": 0.5925333194488024, "grad_norm": 0.0576171875, "learning_rate": 0.0014245915326947553, "loss": 1.1838, "step": 6757 }, { "epoch": 0.5926210112231769, "grad_norm": 0.049560546875, "learning_rate": 0.0014241895807310767, "loss": 1.1975, "step": 6758 }, { "epoch": 0.5927087029975515, "grad_norm": 0.083984375, "learning_rate": 0.0014237876493607255, "loss": 1.1836, "step": 6759 }, { "epoch": 0.592796394771926, "grad_norm": 0.052734375, "learning_rate": 0.0014233857386203562, "loss": 1.1568, "step": 6760 }, { "epoch": 0.5928840865463005, "grad_norm": 0.050048828125, "learning_rate": 0.001422983848546623, "loss": 1.2081, "step": 6761 }, { "epoch": 0.5929717783206752, "grad_norm": 0.052978515625, "learning_rate": 0.0014225819791761768, "loss": 1.2366, "step": 6762 }, { "epoch": 0.5930594700950497, "grad_norm": 0.1005859375, "learning_rate": 0.001422180130545667, "loss": 1.1888, "step": 6763 }, { "epoch": 0.5931471618694242, "grad_norm": 0.057861328125, "learning_rate": 0.0014217783026917409, "loss": 1.1504, "step": 6764 }, { "epoch": 0.5932348536437988, "grad_norm": 0.05126953125, "learning_rate": 0.001421376495651045, "loss": 1.2056, "step": 6765 }, { "epoch": 0.5933225454181733, "grad_norm": 0.0654296875, "learning_rate": 0.0014209747094602217, "loss": 1.147, "step": 6766 }, { "epoch": 0.5934102371925478, "grad_norm": 0.060302734375, "learning_rate": 0.0014205729441559144, "loss": 1.2127, "step": 6767 }, { "epoch": 0.5934979289669224, "grad_norm": 0.058349609375, "learning_rate": 0.0014201711997747619, "loss": 1.1972, "step": 6768 }, { "epoch": 0.593585620741297, "grad_norm": 0.06982421875, "learning_rate": 0.0014197694763534029, "loss": 1.2291, "step": 6769 }, { "epoch": 0.5936733125156715, "grad_norm": 0.07373046875, "learning_rate": 0.0014193677739284735, "loss": 1.2087, "step": 6770 }, { "epoch": 0.5937610042900461, "grad_norm": 0.0458984375, "learning_rate": 0.0014189660925366075, "loss": 1.161, "step": 6771 }, { "epoch": 0.5938486960644206, "grad_norm": 0.078125, "learning_rate": 0.0014185644322144376, "loss": 1.2312, "step": 6772 }, { "epoch": 0.5939363878387951, "grad_norm": 0.0791015625, "learning_rate": 0.0014181627929985936, "loss": 1.1425, "step": 6773 }, { "epoch": 0.5940240796131696, "grad_norm": 0.05615234375, "learning_rate": 0.0014177611749257048, "loss": 1.1488, "step": 6774 }, { "epoch": 0.5941117713875442, "grad_norm": 0.060791015625, "learning_rate": 0.001417359578032397, "loss": 1.2077, "step": 6775 }, { "epoch": 0.5941994631619187, "grad_norm": 0.0634765625, "learning_rate": 0.0014169580023552955, "loss": 1.188, "step": 6776 }, { "epoch": 0.5942871549362932, "grad_norm": 0.09619140625, "learning_rate": 0.0014165564479310224, "loss": 1.149, "step": 6777 }, { "epoch": 0.5943748467106679, "grad_norm": 0.06396484375, "learning_rate": 0.0014161549147961986, "loss": 1.2054, "step": 6778 }, { "epoch": 0.5944625384850424, "grad_norm": 0.07421875, "learning_rate": 0.0014157534029874427, "loss": 1.2211, "step": 6779 }, { "epoch": 0.5945502302594169, "grad_norm": 0.10693359375, "learning_rate": 0.0014153519125413723, "loss": 1.2664, "step": 6780 }, { "epoch": 0.5946379220337915, "grad_norm": 0.055419921875, "learning_rate": 0.0014149504434946012, "loss": 1.1524, "step": 6781 }, { "epoch": 0.594725613808166, "grad_norm": 0.11865234375, "learning_rate": 0.001414548995883743, "loss": 1.1651, "step": 6782 }, { "epoch": 0.5948133055825405, "grad_norm": 0.10791015625, "learning_rate": 0.0014141475697454086, "loss": 1.1674, "step": 6783 }, { "epoch": 0.5949009973569152, "grad_norm": 0.0625, "learning_rate": 0.001413746165116207, "loss": 1.1988, "step": 6784 }, { "epoch": 0.5949886891312897, "grad_norm": 0.13671875, "learning_rate": 0.0014133447820327454, "loss": 1.2604, "step": 6785 }, { "epoch": 0.5950763809056642, "grad_norm": 0.0654296875, "learning_rate": 0.0014129434205316285, "loss": 1.1665, "step": 6786 }, { "epoch": 0.5951640726800388, "grad_norm": 0.057861328125, "learning_rate": 0.0014125420806494603, "loss": 1.1599, "step": 6787 }, { "epoch": 0.5952517644544133, "grad_norm": 0.0634765625, "learning_rate": 0.0014121407624228409, "loss": 1.1055, "step": 6788 }, { "epoch": 0.5953394562287878, "grad_norm": 0.058349609375, "learning_rate": 0.0014117394658883697, "loss": 1.1954, "step": 6789 }, { "epoch": 0.5954271480031624, "grad_norm": 0.0712890625, "learning_rate": 0.0014113381910826445, "loss": 1.2299, "step": 6790 }, { "epoch": 0.5955148397775369, "grad_norm": 0.0634765625, "learning_rate": 0.0014109369380422602, "loss": 1.2035, "step": 6791 }, { "epoch": 0.5956025315519115, "grad_norm": 0.051025390625, "learning_rate": 0.0014105357068038096, "loss": 1.1464, "step": 6792 }, { "epoch": 0.5956902233262861, "grad_norm": 0.0498046875, "learning_rate": 0.0014101344974038847, "loss": 1.197, "step": 6793 }, { "epoch": 0.5957779151006606, "grad_norm": 0.056640625, "learning_rate": 0.0014097333098790743, "loss": 1.1989, "step": 6794 }, { "epoch": 0.5958656068750351, "grad_norm": 0.0556640625, "learning_rate": 0.0014093321442659656, "loss": 1.1357, "step": 6795 }, { "epoch": 0.5959532986494096, "grad_norm": 0.0791015625, "learning_rate": 0.0014089310006011446, "loss": 1.1941, "step": 6796 }, { "epoch": 0.5960409904237842, "grad_norm": 0.052978515625, "learning_rate": 0.0014085298789211936, "loss": 1.2076, "step": 6797 }, { "epoch": 0.5961286821981587, "grad_norm": 0.068359375, "learning_rate": 0.0014081287792626945, "loss": 1.1803, "step": 6798 }, { "epoch": 0.5962163739725332, "grad_norm": 0.0615234375, "learning_rate": 0.001407727701662226, "loss": 1.2515, "step": 6799 }, { "epoch": 0.5963040657469079, "grad_norm": 0.051513671875, "learning_rate": 0.0014073266461563662, "loss": 1.1946, "step": 6800 }, { "epoch": 0.5963917575212824, "grad_norm": 0.052001953125, "learning_rate": 0.0014069256127816896, "loss": 1.1859, "step": 6801 }, { "epoch": 0.5964794492956569, "grad_norm": 0.052001953125, "learning_rate": 0.0014065246015747697, "loss": 1.1768, "step": 6802 }, { "epoch": 0.5965671410700315, "grad_norm": 0.0615234375, "learning_rate": 0.0014061236125721777, "loss": 1.1985, "step": 6803 }, { "epoch": 0.596654832844406, "grad_norm": 0.050537109375, "learning_rate": 0.0014057226458104822, "loss": 1.1202, "step": 6804 }, { "epoch": 0.5967425246187805, "grad_norm": 0.06787109375, "learning_rate": 0.0014053217013262514, "loss": 1.2048, "step": 6805 }, { "epoch": 0.5968302163931551, "grad_norm": 0.07666015625, "learning_rate": 0.0014049207791560494, "loss": 1.1581, "step": 6806 }, { "epoch": 0.5969179081675297, "grad_norm": 0.061767578125, "learning_rate": 0.00140451987933644, "loss": 1.2083, "step": 6807 }, { "epoch": 0.5970055999419042, "grad_norm": 0.06201171875, "learning_rate": 0.0014041190019039832, "loss": 1.1897, "step": 6808 }, { "epoch": 0.5970932917162788, "grad_norm": 0.0927734375, "learning_rate": 0.0014037181468952394, "loss": 1.1692, "step": 6809 }, { "epoch": 0.5971809834906533, "grad_norm": 0.048828125, "learning_rate": 0.0014033173143467644, "loss": 1.1866, "step": 6810 }, { "epoch": 0.5972686752650278, "grad_norm": 0.109375, "learning_rate": 0.0014029165042951134, "loss": 1.1779, "step": 6811 }, { "epoch": 0.5973563670394024, "grad_norm": 0.09228515625, "learning_rate": 0.0014025157167768397, "loss": 1.1219, "step": 6812 }, { "epoch": 0.5974440588137769, "grad_norm": 0.05029296875, "learning_rate": 0.0014021149518284935, "loss": 1.1816, "step": 6813 }, { "epoch": 0.5975317505881514, "grad_norm": 0.083984375, "learning_rate": 0.001401714209486624, "loss": 1.1906, "step": 6814 }, { "epoch": 0.5976194423625261, "grad_norm": 0.06591796875, "learning_rate": 0.0014013134897877778, "loss": 1.2229, "step": 6815 }, { "epoch": 0.5977071341369006, "grad_norm": 0.0458984375, "learning_rate": 0.001400912792768499, "loss": 1.1736, "step": 6816 }, { "epoch": 0.5977948259112751, "grad_norm": 0.09228515625, "learning_rate": 0.0014005121184653309, "loss": 1.2107, "step": 6817 }, { "epoch": 0.5978825176856497, "grad_norm": 0.0869140625, "learning_rate": 0.0014001114669148131, "loss": 1.2732, "step": 6818 }, { "epoch": 0.5979702094600242, "grad_norm": 0.08740234375, "learning_rate": 0.0013997108381534845, "loss": 1.2155, "step": 6819 }, { "epoch": 0.5980579012343987, "grad_norm": 0.10400390625, "learning_rate": 0.001399310232217882, "loss": 1.2146, "step": 6820 }, { "epoch": 0.5981455930087732, "grad_norm": 0.0712890625, "learning_rate": 0.001398909649144539, "loss": 1.1701, "step": 6821 }, { "epoch": 0.5982332847831479, "grad_norm": 0.060302734375, "learning_rate": 0.0013985090889699872, "loss": 1.1972, "step": 6822 }, { "epoch": 0.5983209765575224, "grad_norm": 0.12451171875, "learning_rate": 0.0013981085517307578, "loss": 1.1916, "step": 6823 }, { "epoch": 0.5984086683318969, "grad_norm": 0.0556640625, "learning_rate": 0.0013977080374633776, "loss": 1.1636, "step": 6824 }, { "epoch": 0.5984963601062715, "grad_norm": 0.0859375, "learning_rate": 0.0013973075462043738, "loss": 1.2452, "step": 6825 }, { "epoch": 0.598584051880646, "grad_norm": 0.0859375, "learning_rate": 0.0013969070779902687, "loss": 1.1428, "step": 6826 }, { "epoch": 0.5986717436550205, "grad_norm": 0.0478515625, "learning_rate": 0.0013965066328575852, "loss": 1.165, "step": 6827 }, { "epoch": 0.5987594354293951, "grad_norm": 0.0703125, "learning_rate": 0.0013961062108428421, "loss": 1.2005, "step": 6828 }, { "epoch": 0.5988471272037696, "grad_norm": 0.055419921875, "learning_rate": 0.0013957058119825577, "loss": 1.0807, "step": 6829 }, { "epoch": 0.5989348189781442, "grad_norm": 0.051513671875, "learning_rate": 0.0013953054363132465, "loss": 1.1201, "step": 6830 }, { "epoch": 0.5990225107525188, "grad_norm": 0.06591796875, "learning_rate": 0.001394905083871422, "loss": 1.1901, "step": 6831 }, { "epoch": 0.5991102025268933, "grad_norm": 0.048583984375, "learning_rate": 0.0013945047546935951, "loss": 1.2216, "step": 6832 }, { "epoch": 0.5991978943012678, "grad_norm": 0.052978515625, "learning_rate": 0.0013941044488162753, "loss": 1.154, "step": 6833 }, { "epoch": 0.5992855860756424, "grad_norm": 0.053955078125, "learning_rate": 0.001393704166275969, "loss": 1.2071, "step": 6834 }, { "epoch": 0.5993732778500169, "grad_norm": 0.0771484375, "learning_rate": 0.001393303907109181, "loss": 1.2169, "step": 6835 }, { "epoch": 0.5994609696243914, "grad_norm": 0.0546875, "learning_rate": 0.0013929036713524147, "loss": 1.216, "step": 6836 }, { "epoch": 0.5995486613987661, "grad_norm": 0.048828125, "learning_rate": 0.0013925034590421695, "loss": 1.1711, "step": 6837 }, { "epoch": 0.5996363531731406, "grad_norm": 0.05419921875, "learning_rate": 0.0013921032702149444, "loss": 1.2566, "step": 6838 }, { "epoch": 0.5997240449475151, "grad_norm": 0.0673828125, "learning_rate": 0.0013917031049072346, "loss": 1.1995, "step": 6839 }, { "epoch": 0.5998117367218897, "grad_norm": 0.054443359375, "learning_rate": 0.0013913029631555352, "loss": 1.272, "step": 6840 }, { "epoch": 0.5998994284962642, "grad_norm": 0.05615234375, "learning_rate": 0.001390902844996338, "loss": 1.1309, "step": 6841 }, { "epoch": 0.5999871202706387, "grad_norm": 0.060546875, "learning_rate": 0.0013905027504661316, "loss": 1.1587, "step": 6842 }, { "epoch": 0.6000748120450133, "grad_norm": 0.0693359375, "learning_rate": 0.0013901026796014051, "loss": 1.1809, "step": 6843 }, { "epoch": 0.6001625038193878, "grad_norm": 0.05517578125, "learning_rate": 0.0013897026324386436, "loss": 1.131, "step": 6844 }, { "epoch": 0.6002501955937624, "grad_norm": 0.06591796875, "learning_rate": 0.0013893026090143299, "loss": 1.3103, "step": 6845 }, { "epoch": 0.6003378873681369, "grad_norm": 0.052734375, "learning_rate": 0.0013889026093649448, "loss": 1.2151, "step": 6846 }, { "epoch": 0.6004255791425115, "grad_norm": 0.0556640625, "learning_rate": 0.0013885026335269679, "loss": 1.1716, "step": 6847 }, { "epoch": 0.600513270916886, "grad_norm": 0.06396484375, "learning_rate": 0.001388102681536876, "loss": 1.1769, "step": 6848 }, { "epoch": 0.6006009626912605, "grad_norm": 0.048095703125, "learning_rate": 0.0013877027534311434, "loss": 1.2155, "step": 6849 }, { "epoch": 0.6006886544656351, "grad_norm": 0.07763671875, "learning_rate": 0.0013873028492462423, "loss": 1.2106, "step": 6850 }, { "epoch": 0.6007763462400096, "grad_norm": 0.060302734375, "learning_rate": 0.0013869029690186435, "loss": 1.2018, "step": 6851 }, { "epoch": 0.6008640380143841, "grad_norm": 0.0595703125, "learning_rate": 0.0013865031127848143, "loss": 1.1013, "step": 6852 }, { "epoch": 0.6009517297887588, "grad_norm": 0.10400390625, "learning_rate": 0.0013861032805812215, "loss": 1.1938, "step": 6853 }, { "epoch": 0.6010394215631333, "grad_norm": 0.055419921875, "learning_rate": 0.0013857034724443287, "loss": 1.1632, "step": 6854 }, { "epoch": 0.6011271133375078, "grad_norm": 0.07177734375, "learning_rate": 0.0013853036884105964, "loss": 1.1609, "step": 6855 }, { "epoch": 0.6012148051118824, "grad_norm": 0.0517578125, "learning_rate": 0.0013849039285164844, "loss": 1.1741, "step": 6856 }, { "epoch": 0.6013024968862569, "grad_norm": 0.0625, "learning_rate": 0.0013845041927984495, "loss": 1.2541, "step": 6857 }, { "epoch": 0.6013901886606314, "grad_norm": 0.054931640625, "learning_rate": 0.0013841044812929473, "loss": 1.1671, "step": 6858 }, { "epoch": 0.601477880435006, "grad_norm": 0.04833984375, "learning_rate": 0.0013837047940364296, "loss": 1.183, "step": 6859 }, { "epoch": 0.6015655722093806, "grad_norm": 0.0556640625, "learning_rate": 0.0013833051310653474, "loss": 1.1803, "step": 6860 }, { "epoch": 0.6016532639837551, "grad_norm": 0.060302734375, "learning_rate": 0.0013829054924161485, "loss": 1.189, "step": 6861 }, { "epoch": 0.6017409557581297, "grad_norm": 0.048828125, "learning_rate": 0.0013825058781252798, "loss": 1.1693, "step": 6862 }, { "epoch": 0.6018286475325042, "grad_norm": 0.057373046875, "learning_rate": 0.0013821062882291841, "loss": 1.1415, "step": 6863 }, { "epoch": 0.6019163393068787, "grad_norm": 0.06298828125, "learning_rate": 0.0013817067227643034, "loss": 1.1776, "step": 6864 }, { "epoch": 0.6020040310812533, "grad_norm": 0.07861328125, "learning_rate": 0.0013813071817670769, "loss": 1.1901, "step": 6865 }, { "epoch": 0.6020917228556278, "grad_norm": 0.06640625, "learning_rate": 0.0013809076652739416, "loss": 1.1895, "step": 6866 }, { "epoch": 0.6021794146300024, "grad_norm": 0.09765625, "learning_rate": 0.0013805081733213333, "loss": 1.1916, "step": 6867 }, { "epoch": 0.6022671064043769, "grad_norm": 0.0810546875, "learning_rate": 0.0013801087059456833, "loss": 1.2206, "step": 6868 }, { "epoch": 0.6023547981787515, "grad_norm": 0.05126953125, "learning_rate": 0.001379709263183423, "loss": 1.2115, "step": 6869 }, { "epoch": 0.602442489953126, "grad_norm": 0.052978515625, "learning_rate": 0.0013793098450709798, "loss": 1.1523, "step": 6870 }, { "epoch": 0.6025301817275005, "grad_norm": 0.08203125, "learning_rate": 0.0013789104516447803, "loss": 1.2348, "step": 6871 }, { "epoch": 0.6026178735018751, "grad_norm": 0.0556640625, "learning_rate": 0.0013785110829412479, "loss": 1.1224, "step": 6872 }, { "epoch": 0.6027055652762496, "grad_norm": 0.0693359375, "learning_rate": 0.001378111738996803, "loss": 1.1458, "step": 6873 }, { "epoch": 0.6027932570506241, "grad_norm": 0.1474609375, "learning_rate": 0.0013777124198478667, "loss": 1.1619, "step": 6874 }, { "epoch": 0.6028809488249988, "grad_norm": 0.08642578125, "learning_rate": 0.0013773131255308542, "loss": 1.1659, "step": 6875 }, { "epoch": 0.6029686405993733, "grad_norm": 0.10205078125, "learning_rate": 0.0013769138560821806, "loss": 1.1364, "step": 6876 }, { "epoch": 0.6030563323737478, "grad_norm": 0.1357421875, "learning_rate": 0.0013765146115382587, "loss": 1.1638, "step": 6877 }, { "epoch": 0.6031440241481224, "grad_norm": 0.04931640625, "learning_rate": 0.0013761153919354982, "loss": 1.1799, "step": 6878 }, { "epoch": 0.6032317159224969, "grad_norm": 0.11181640625, "learning_rate": 0.0013757161973103065, "loss": 1.2408, "step": 6879 }, { "epoch": 0.6033194076968714, "grad_norm": 0.06005859375, "learning_rate": 0.0013753170276990898, "loss": 1.2104, "step": 6880 }, { "epoch": 0.603407099471246, "grad_norm": 0.064453125, "learning_rate": 0.0013749178831382514, "loss": 1.167, "step": 6881 }, { "epoch": 0.6034947912456206, "grad_norm": 0.056396484375, "learning_rate": 0.0013745187636641917, "loss": 1.1546, "step": 6882 }, { "epoch": 0.6035824830199951, "grad_norm": 0.056396484375, "learning_rate": 0.0013741196693133092, "loss": 1.1943, "step": 6883 }, { "epoch": 0.6036701747943697, "grad_norm": 0.048828125, "learning_rate": 0.001373720600122001, "loss": 1.1805, "step": 6884 }, { "epoch": 0.6037578665687442, "grad_norm": 0.047607421875, "learning_rate": 0.001373321556126661, "loss": 1.1129, "step": 6885 }, { "epoch": 0.6038455583431187, "grad_norm": 0.051025390625, "learning_rate": 0.001372922537363681, "loss": 1.1487, "step": 6886 }, { "epoch": 0.6039332501174933, "grad_norm": 0.06298828125, "learning_rate": 0.0013725235438694498, "loss": 1.2471, "step": 6887 }, { "epoch": 0.6040209418918678, "grad_norm": 0.051513671875, "learning_rate": 0.0013721245756803558, "loss": 1.184, "step": 6888 }, { "epoch": 0.6041086336662423, "grad_norm": 0.0986328125, "learning_rate": 0.0013717256328327828, "loss": 1.2545, "step": 6889 }, { "epoch": 0.604196325440617, "grad_norm": 0.0556640625, "learning_rate": 0.0013713267153631135, "loss": 1.1896, "step": 6890 }, { "epoch": 0.6042840172149915, "grad_norm": 0.10888671875, "learning_rate": 0.0013709278233077287, "loss": 1.127, "step": 6891 }, { "epoch": 0.604371708989366, "grad_norm": 0.07763671875, "learning_rate": 0.0013705289567030056, "loss": 1.2185, "step": 6892 }, { "epoch": 0.6044594007637405, "grad_norm": 0.08056640625, "learning_rate": 0.0013701301155853202, "loss": 1.1755, "step": 6893 }, { "epoch": 0.6045470925381151, "grad_norm": 0.07275390625, "learning_rate": 0.0013697312999910464, "loss": 1.2099, "step": 6894 }, { "epoch": 0.6046347843124896, "grad_norm": 0.052734375, "learning_rate": 0.001369332509956554, "loss": 1.3204, "step": 6895 }, { "epoch": 0.6047224760868641, "grad_norm": 0.055908203125, "learning_rate": 0.0013689337455182122, "loss": 1.1841, "step": 6896 }, { "epoch": 0.6048101678612388, "grad_norm": 0.06298828125, "learning_rate": 0.0013685350067123873, "loss": 1.138, "step": 6897 }, { "epoch": 0.6048978596356133, "grad_norm": 0.052001953125, "learning_rate": 0.0013681362935754435, "loss": 1.1567, "step": 6898 }, { "epoch": 0.6049855514099878, "grad_norm": 0.060546875, "learning_rate": 0.0013677376061437417, "loss": 1.1845, "step": 6899 }, { "epoch": 0.6050732431843624, "grad_norm": 0.060546875, "learning_rate": 0.0013673389444536417, "loss": 1.2169, "step": 6900 }, { "epoch": 0.6051609349587369, "grad_norm": 0.0517578125, "learning_rate": 0.0013669403085415, "loss": 1.2568, "step": 6901 }, { "epoch": 0.6052486267331114, "grad_norm": 0.080078125, "learning_rate": 0.0013665416984436718, "loss": 1.1579, "step": 6902 }, { "epoch": 0.605336318507486, "grad_norm": 0.052490234375, "learning_rate": 0.0013661431141965088, "loss": 1.1972, "step": 6903 }, { "epoch": 0.6054240102818605, "grad_norm": 0.059326171875, "learning_rate": 0.0013657445558363612, "loss": 1.1843, "step": 6904 }, { "epoch": 0.6055117020562351, "grad_norm": 0.072265625, "learning_rate": 0.0013653460233995767, "loss": 1.1634, "step": 6905 }, { "epoch": 0.6055993938306097, "grad_norm": 0.04833984375, "learning_rate": 0.0013649475169224992, "loss": 1.1354, "step": 6906 }, { "epoch": 0.6056870856049842, "grad_norm": 0.1005859375, "learning_rate": 0.0013645490364414727, "loss": 1.1912, "step": 6907 }, { "epoch": 0.6057747773793587, "grad_norm": 0.054931640625, "learning_rate": 0.001364150581992837, "loss": 1.1604, "step": 6908 }, { "epoch": 0.6058624691537333, "grad_norm": 0.053466796875, "learning_rate": 0.0013637521536129303, "loss": 1.2345, "step": 6909 }, { "epoch": 0.6059501609281078, "grad_norm": 0.0771484375, "learning_rate": 0.0013633537513380886, "loss": 1.1847, "step": 6910 }, { "epoch": 0.6060378527024823, "grad_norm": 0.052001953125, "learning_rate": 0.0013629553752046446, "loss": 1.325, "step": 6911 }, { "epoch": 0.606125544476857, "grad_norm": 0.052978515625, "learning_rate": 0.0013625570252489292, "loss": 1.1994, "step": 6912 }, { "epoch": 0.6062132362512315, "grad_norm": 0.052001953125, "learning_rate": 0.0013621587015072714, "loss": 1.1844, "step": 6913 }, { "epoch": 0.606300928025606, "grad_norm": 0.048095703125, "learning_rate": 0.0013617604040159967, "loss": 1.1989, "step": 6914 }, { "epoch": 0.6063886197999805, "grad_norm": 0.052734375, "learning_rate": 0.0013613621328114292, "loss": 1.1998, "step": 6915 }, { "epoch": 0.6064763115743551, "grad_norm": 0.049560546875, "learning_rate": 0.0013609638879298897, "loss": 1.149, "step": 6916 }, { "epoch": 0.6065640033487296, "grad_norm": 0.055908203125, "learning_rate": 0.0013605656694076975, "loss": 1.2007, "step": 6917 }, { "epoch": 0.6066516951231041, "grad_norm": 0.0537109375, "learning_rate": 0.0013601674772811697, "loss": 1.1936, "step": 6918 }, { "epoch": 0.6067393868974787, "grad_norm": 0.052001953125, "learning_rate": 0.001359769311586619, "loss": 1.1875, "step": 6919 }, { "epoch": 0.6068270786718533, "grad_norm": 0.05419921875, "learning_rate": 0.0013593711723603585, "loss": 1.1496, "step": 6920 }, { "epoch": 0.6069147704462278, "grad_norm": 0.049072265625, "learning_rate": 0.0013589730596386964, "loss": 1.2029, "step": 6921 }, { "epoch": 0.6070024622206024, "grad_norm": 0.07275390625, "learning_rate": 0.001358574973457941, "loss": 1.2065, "step": 6922 }, { "epoch": 0.6070901539949769, "grad_norm": 0.053466796875, "learning_rate": 0.0013581769138543947, "loss": 1.2143, "step": 6923 }, { "epoch": 0.6071778457693514, "grad_norm": 0.050048828125, "learning_rate": 0.0013577788808643606, "loss": 1.1921, "step": 6924 }, { "epoch": 0.607265537543726, "grad_norm": 0.0615234375, "learning_rate": 0.0013573808745241385, "loss": 1.2017, "step": 6925 }, { "epoch": 0.6073532293181005, "grad_norm": 0.056884765625, "learning_rate": 0.001356982894870025, "loss": 1.1938, "step": 6926 }, { "epoch": 0.607440921092475, "grad_norm": 0.047607421875, "learning_rate": 0.0013565849419383152, "loss": 1.1738, "step": 6927 }, { "epoch": 0.6075286128668497, "grad_norm": 0.047119140625, "learning_rate": 0.0013561870157653012, "loss": 1.1541, "step": 6928 }, { "epoch": 0.6076163046412242, "grad_norm": 0.053466796875, "learning_rate": 0.001355789116387273, "loss": 1.1253, "step": 6929 }, { "epoch": 0.6077039964155987, "grad_norm": 0.049072265625, "learning_rate": 0.0013553912438405175, "loss": 1.1673, "step": 6930 }, { "epoch": 0.6077916881899733, "grad_norm": 0.07080078125, "learning_rate": 0.0013549933981613202, "loss": 1.173, "step": 6931 }, { "epoch": 0.6078793799643478, "grad_norm": 0.052734375, "learning_rate": 0.0013545955793859636, "loss": 1.2629, "step": 6932 }, { "epoch": 0.6079670717387223, "grad_norm": 0.0634765625, "learning_rate": 0.0013541977875507277, "loss": 1.2126, "step": 6933 }, { "epoch": 0.608054763513097, "grad_norm": 0.0576171875, "learning_rate": 0.0013538000226918894, "loss": 1.1904, "step": 6934 }, { "epoch": 0.6081424552874715, "grad_norm": 0.05908203125, "learning_rate": 0.0013534022848457252, "loss": 1.1339, "step": 6935 }, { "epoch": 0.608230147061846, "grad_norm": 0.064453125, "learning_rate": 0.001353004574048506, "loss": 1.2407, "step": 6936 }, { "epoch": 0.6083178388362206, "grad_norm": 0.0498046875, "learning_rate": 0.0013526068903365039, "loss": 1.1705, "step": 6937 }, { "epoch": 0.6084055306105951, "grad_norm": 0.07275390625, "learning_rate": 0.001352209233745985, "loss": 1.1829, "step": 6938 }, { "epoch": 0.6084932223849696, "grad_norm": 0.09033203125, "learning_rate": 0.001351811604313216, "loss": 1.2244, "step": 6939 }, { "epoch": 0.6085809141593441, "grad_norm": 0.08056640625, "learning_rate": 0.0013514140020744587, "loss": 1.145, "step": 6940 }, { "epoch": 0.6086686059337187, "grad_norm": 0.08984375, "learning_rate": 0.0013510164270659729, "loss": 1.1979, "step": 6941 }, { "epoch": 0.6087562977080933, "grad_norm": 0.04833984375, "learning_rate": 0.001350618879324018, "loss": 1.1216, "step": 6942 }, { "epoch": 0.6088439894824678, "grad_norm": 0.04833984375, "learning_rate": 0.0013502213588848476, "loss": 1.1368, "step": 6943 }, { "epoch": 0.6089316812568424, "grad_norm": 0.0634765625, "learning_rate": 0.001349823865784716, "loss": 1.2527, "step": 6944 }, { "epoch": 0.6090193730312169, "grad_norm": 0.08935546875, "learning_rate": 0.0013494264000598725, "loss": 1.2081, "step": 6945 }, { "epoch": 0.6091070648055914, "grad_norm": 0.0498046875, "learning_rate": 0.0013490289617465656, "loss": 1.1694, "step": 6946 }, { "epoch": 0.609194756579966, "grad_norm": 0.09912109375, "learning_rate": 0.0013486315508810407, "loss": 1.1969, "step": 6947 }, { "epoch": 0.6092824483543405, "grad_norm": 0.05517578125, "learning_rate": 0.00134823416749954, "loss": 1.2063, "step": 6948 }, { "epoch": 0.609370140128715, "grad_norm": 0.06982421875, "learning_rate": 0.0013478368116383044, "loss": 1.1709, "step": 6949 }, { "epoch": 0.6094578319030897, "grad_norm": 0.06396484375, "learning_rate": 0.0013474394833335715, "loss": 1.1013, "step": 6950 }, { "epoch": 0.6095455236774642, "grad_norm": 0.05810546875, "learning_rate": 0.001347042182621577, "loss": 1.2187, "step": 6951 }, { "epoch": 0.6096332154518387, "grad_norm": 0.05810546875, "learning_rate": 0.001346644909538553, "loss": 1.2124, "step": 6952 }, { "epoch": 0.6097209072262133, "grad_norm": 0.06640625, "learning_rate": 0.00134624766412073, "loss": 1.2071, "step": 6953 }, { "epoch": 0.6098085990005878, "grad_norm": 0.055419921875, "learning_rate": 0.0013458504464043358, "loss": 1.1592, "step": 6954 }, { "epoch": 0.6098962907749623, "grad_norm": 0.06884765625, "learning_rate": 0.0013454532564255966, "loss": 1.2157, "step": 6955 }, { "epoch": 0.609983982549337, "grad_norm": 0.05126953125, "learning_rate": 0.001345056094220734, "loss": 1.1858, "step": 6956 }, { "epoch": 0.6100716743237115, "grad_norm": 0.06396484375, "learning_rate": 0.0013446589598259677, "loss": 1.164, "step": 6957 }, { "epoch": 0.610159366098086, "grad_norm": 0.050048828125, "learning_rate": 0.0013442618532775168, "loss": 1.1664, "step": 6958 }, { "epoch": 0.6102470578724606, "grad_norm": 0.049560546875, "learning_rate": 0.0013438647746115947, "loss": 1.191, "step": 6959 }, { "epoch": 0.6103347496468351, "grad_norm": 0.052490234375, "learning_rate": 0.0013434677238644158, "loss": 1.209, "step": 6960 }, { "epoch": 0.6104224414212096, "grad_norm": 0.046875, "learning_rate": 0.0013430707010721884, "loss": 1.2071, "step": 6961 }, { "epoch": 0.6105101331955842, "grad_norm": 0.047119140625, "learning_rate": 0.0013426737062711212, "loss": 1.2008, "step": 6962 }, { "epoch": 0.6105978249699587, "grad_norm": 0.047607421875, "learning_rate": 0.0013422767394974183, "loss": 1.2284, "step": 6963 }, { "epoch": 0.6106855167443332, "grad_norm": 0.0517578125, "learning_rate": 0.001341879800787283, "loss": 1.2023, "step": 6964 }, { "epoch": 0.6107732085187078, "grad_norm": 0.047607421875, "learning_rate": 0.001341482890176914, "loss": 1.1518, "step": 6965 }, { "epoch": 0.6108609002930824, "grad_norm": 0.05322265625, "learning_rate": 0.0013410860077025088, "loss": 1.1607, "step": 6966 }, { "epoch": 0.6109485920674569, "grad_norm": 0.052734375, "learning_rate": 0.0013406891534002625, "loss": 1.1731, "step": 6967 }, { "epoch": 0.6110362838418314, "grad_norm": 0.052490234375, "learning_rate": 0.0013402923273063672, "loss": 1.1497, "step": 6968 }, { "epoch": 0.611123975616206, "grad_norm": 0.053466796875, "learning_rate": 0.0013398955294570115, "loss": 1.2441, "step": 6969 }, { "epoch": 0.6112116673905805, "grad_norm": 0.068359375, "learning_rate": 0.0013394987598883833, "loss": 1.165, "step": 6970 }, { "epoch": 0.611299359164955, "grad_norm": 0.06884765625, "learning_rate": 0.0013391020186366666, "loss": 1.1901, "step": 6971 }, { "epoch": 0.6113870509393297, "grad_norm": 0.08984375, "learning_rate": 0.0013387053057380433, "loss": 1.2479, "step": 6972 }, { "epoch": 0.6114747427137042, "grad_norm": 0.0712890625, "learning_rate": 0.0013383086212286931, "loss": 1.1971, "step": 6973 }, { "epoch": 0.6115624344880787, "grad_norm": 0.0859375, "learning_rate": 0.0013379119651447916, "loss": 1.2014, "step": 6974 }, { "epoch": 0.6116501262624533, "grad_norm": 0.055419921875, "learning_rate": 0.0013375153375225126, "loss": 1.1978, "step": 6975 }, { "epoch": 0.6117378180368278, "grad_norm": 0.0869140625, "learning_rate": 0.001337118738398029, "loss": 1.1681, "step": 6976 }, { "epoch": 0.6118255098112023, "grad_norm": 0.07763671875, "learning_rate": 0.001336722167807508, "loss": 1.2362, "step": 6977 }, { "epoch": 0.6119132015855769, "grad_norm": 0.05029296875, "learning_rate": 0.001336325625787117, "loss": 1.1819, "step": 6978 }, { "epoch": 0.6120008933599514, "grad_norm": 0.064453125, "learning_rate": 0.0013359291123730191, "loss": 1.1541, "step": 6979 }, { "epoch": 0.612088585134326, "grad_norm": 0.0654296875, "learning_rate": 0.0013355326276013756, "loss": 1.2272, "step": 6980 }, { "epoch": 0.6121762769087006, "grad_norm": 0.052490234375, "learning_rate": 0.0013351361715083442, "loss": 1.1874, "step": 6981 }, { "epoch": 0.6122639686830751, "grad_norm": 0.0654296875, "learning_rate": 0.0013347397441300815, "loss": 1.1635, "step": 6982 }, { "epoch": 0.6123516604574496, "grad_norm": 0.053466796875, "learning_rate": 0.0013343433455027401, "loss": 1.1772, "step": 6983 }, { "epoch": 0.6124393522318242, "grad_norm": 0.057373046875, "learning_rate": 0.0013339469756624709, "loss": 1.175, "step": 6984 }, { "epoch": 0.6125270440061987, "grad_norm": 0.0625, "learning_rate": 0.0013335506346454213, "loss": 1.2408, "step": 6985 }, { "epoch": 0.6126147357805732, "grad_norm": 0.05126953125, "learning_rate": 0.001333154322487737, "loss": 1.2149, "step": 6986 }, { "epoch": 0.6127024275549477, "grad_norm": 0.06005859375, "learning_rate": 0.001332758039225561, "loss": 1.1185, "step": 6987 }, { "epoch": 0.6127901193293224, "grad_norm": 0.060791015625, "learning_rate": 0.0013323617848950327, "loss": 1.2158, "step": 6988 }, { "epoch": 0.6128778111036969, "grad_norm": 0.0537109375, "learning_rate": 0.001331965559532289, "loss": 1.2139, "step": 6989 }, { "epoch": 0.6129655028780714, "grad_norm": 0.10693359375, "learning_rate": 0.0013315693631734665, "loss": 1.2748, "step": 6990 }, { "epoch": 0.613053194652446, "grad_norm": 0.05517578125, "learning_rate": 0.0013311731958546957, "loss": 1.185, "step": 6991 }, { "epoch": 0.6131408864268205, "grad_norm": 0.09423828125, "learning_rate": 0.0013307770576121056, "loss": 1.1648, "step": 6992 }, { "epoch": 0.613228578201195, "grad_norm": 0.09033203125, "learning_rate": 0.0013303809484818243, "loss": 1.1399, "step": 6993 }, { "epoch": 0.6133162699755697, "grad_norm": 0.0537109375, "learning_rate": 0.001329984868499975, "loss": 1.177, "step": 6994 }, { "epoch": 0.6134039617499442, "grad_norm": 0.051025390625, "learning_rate": 0.0013295888177026803, "loss": 1.1577, "step": 6995 }, { "epoch": 0.6134916535243187, "grad_norm": 0.07275390625, "learning_rate": 0.0013291927961260577, "loss": 1.1603, "step": 6996 }, { "epoch": 0.6135793452986933, "grad_norm": 0.0517578125, "learning_rate": 0.0013287968038062242, "loss": 1.1566, "step": 6997 }, { "epoch": 0.6136670370730678, "grad_norm": 0.049560546875, "learning_rate": 0.0013284008407792927, "loss": 1.1418, "step": 6998 }, { "epoch": 0.6137547288474423, "grad_norm": 0.0869140625, "learning_rate": 0.0013280049070813745, "loss": 1.2057, "step": 6999 }, { "epoch": 0.6138424206218169, "grad_norm": 0.049072265625, "learning_rate": 0.001327609002748578, "loss": 1.1716, "step": 7000 }, { "epoch": 0.6138424206218169, "eval_loss": 1.1943082809448242, "eval_runtime": 429.3829, "eval_samples_per_second": 33.646, "eval_steps_per_second": 8.412, "step": 7000 }, { "epoch": 0.6139301123961914, "grad_norm": 0.053955078125, "learning_rate": 0.0013272131278170075, "loss": 1.1954, "step": 7001 }, { "epoch": 0.614017804170566, "grad_norm": 0.047607421875, "learning_rate": 0.001326817282322767, "loss": 1.1842, "step": 7002 }, { "epoch": 0.6141054959449406, "grad_norm": 0.0615234375, "learning_rate": 0.0013264214663019558, "loss": 1.1614, "step": 7003 }, { "epoch": 0.6141931877193151, "grad_norm": 0.051025390625, "learning_rate": 0.0013260256797906714, "loss": 1.1849, "step": 7004 }, { "epoch": 0.6142808794936896, "grad_norm": 0.057373046875, "learning_rate": 0.001325629922825009, "loss": 1.147, "step": 7005 }, { "epoch": 0.6143685712680642, "grad_norm": 0.056640625, "learning_rate": 0.00132523419544106, "loss": 1.1804, "step": 7006 }, { "epoch": 0.6144562630424387, "grad_norm": 0.046630859375, "learning_rate": 0.0013248384976749146, "loss": 1.1502, "step": 7007 }, { "epoch": 0.6145439548168132, "grad_norm": 0.04931640625, "learning_rate": 0.0013244428295626585, "loss": 1.1608, "step": 7008 }, { "epoch": 0.6146316465911879, "grad_norm": 0.047607421875, "learning_rate": 0.001324047191140376, "loss": 1.1679, "step": 7009 }, { "epoch": 0.6147193383655624, "grad_norm": 0.04638671875, "learning_rate": 0.0013236515824441475, "loss": 1.1446, "step": 7010 }, { "epoch": 0.6148070301399369, "grad_norm": 0.0703125, "learning_rate": 0.001323256003510053, "loss": 1.1574, "step": 7011 }, { "epoch": 0.6148947219143114, "grad_norm": 0.050537109375, "learning_rate": 0.001322860454374167, "loss": 1.2559, "step": 7012 }, { "epoch": 0.614982413688686, "grad_norm": 0.059326171875, "learning_rate": 0.0013224649350725632, "loss": 1.1603, "step": 7013 }, { "epoch": 0.6150701054630605, "grad_norm": 0.0703125, "learning_rate": 0.0013220694456413118, "loss": 1.1943, "step": 7014 }, { "epoch": 0.615157797237435, "grad_norm": 0.056884765625, "learning_rate": 0.0013216739861164808, "loss": 1.1986, "step": 7015 }, { "epoch": 0.6152454890118096, "grad_norm": 0.0849609375, "learning_rate": 0.001321278556534134, "loss": 1.1978, "step": 7016 }, { "epoch": 0.6153331807861842, "grad_norm": 0.072265625, "learning_rate": 0.0013208831569303347, "loss": 1.2111, "step": 7017 }, { "epoch": 0.6154208725605587, "grad_norm": 0.052490234375, "learning_rate": 0.001320487787341142, "loss": 1.2846, "step": 7018 }, { "epoch": 0.6155085643349333, "grad_norm": 0.0712890625, "learning_rate": 0.001320092447802612, "loss": 1.1766, "step": 7019 }, { "epoch": 0.6155962561093078, "grad_norm": 0.060791015625, "learning_rate": 0.0013196971383507992, "loss": 1.1664, "step": 7020 }, { "epoch": 0.6156839478836823, "grad_norm": 0.06201171875, "learning_rate": 0.0013193018590217551, "loss": 1.2146, "step": 7021 }, { "epoch": 0.6157716396580569, "grad_norm": 0.05908203125, "learning_rate": 0.0013189066098515275, "loss": 1.2423, "step": 7022 }, { "epoch": 0.6158593314324314, "grad_norm": 0.052734375, "learning_rate": 0.0013185113908761627, "loss": 1.169, "step": 7023 }, { "epoch": 0.6159470232068059, "grad_norm": 0.059814453125, "learning_rate": 0.0013181162021317033, "loss": 1.1908, "step": 7024 }, { "epoch": 0.6160347149811806, "grad_norm": 0.05126953125, "learning_rate": 0.001317721043654189, "loss": 1.2204, "step": 7025 }, { "epoch": 0.6161224067555551, "grad_norm": 0.049560546875, "learning_rate": 0.001317325915479658, "loss": 1.1797, "step": 7026 }, { "epoch": 0.6162100985299296, "grad_norm": 0.06298828125, "learning_rate": 0.001316930817644145, "loss": 1.1451, "step": 7027 }, { "epoch": 0.6162977903043042, "grad_norm": 0.053955078125, "learning_rate": 0.001316535750183681, "loss": 1.1629, "step": 7028 }, { "epoch": 0.6163854820786787, "grad_norm": 0.046875, "learning_rate": 0.0013161407131342961, "loss": 1.1312, "step": 7029 }, { "epoch": 0.6164731738530532, "grad_norm": 0.049560546875, "learning_rate": 0.0013157457065320162, "loss": 1.1795, "step": 7030 }, { "epoch": 0.6165608656274278, "grad_norm": 0.048095703125, "learning_rate": 0.0013153507304128652, "loss": 1.1211, "step": 7031 }, { "epoch": 0.6166485574018024, "grad_norm": 0.052734375, "learning_rate": 0.0013149557848128634, "loss": 1.2982, "step": 7032 }, { "epoch": 0.6167362491761769, "grad_norm": 0.052734375, "learning_rate": 0.0013145608697680295, "loss": 1.166, "step": 7033 }, { "epoch": 0.6168239409505515, "grad_norm": 0.053955078125, "learning_rate": 0.001314165985314378, "loss": 1.1965, "step": 7034 }, { "epoch": 0.616911632724926, "grad_norm": 0.055908203125, "learning_rate": 0.0013137711314879217, "loss": 1.1536, "step": 7035 }, { "epoch": 0.6169993244993005, "grad_norm": 0.06982421875, "learning_rate": 0.0013133763083246703, "loss": 1.272, "step": 7036 }, { "epoch": 0.617087016273675, "grad_norm": 0.11376953125, "learning_rate": 0.0013129815158606309, "loss": 1.2778, "step": 7037 }, { "epoch": 0.6171747080480496, "grad_norm": 0.07080078125, "learning_rate": 0.0013125867541318068, "loss": 1.1879, "step": 7038 }, { "epoch": 0.6172623998224241, "grad_norm": 0.06640625, "learning_rate": 0.0013121920231741998, "loss": 1.1608, "step": 7039 }, { "epoch": 0.6173500915967987, "grad_norm": 0.10791015625, "learning_rate": 0.0013117973230238084, "loss": 1.2586, "step": 7040 }, { "epoch": 0.6174377833711733, "grad_norm": 0.057373046875, "learning_rate": 0.0013114026537166283, "loss": 1.2589, "step": 7041 }, { "epoch": 0.6175254751455478, "grad_norm": 0.1318359375, "learning_rate": 0.001311008015288652, "loss": 1.2002, "step": 7042 }, { "epoch": 0.6176131669199223, "grad_norm": 0.126953125, "learning_rate": 0.001310613407775869, "loss": 1.1801, "step": 7043 }, { "epoch": 0.6177008586942969, "grad_norm": 0.07275390625, "learning_rate": 0.001310218831214268, "loss": 1.1792, "step": 7044 }, { "epoch": 0.6177885504686714, "grad_norm": 0.11669921875, "learning_rate": 0.0013098242856398315, "loss": 1.1476, "step": 7045 }, { "epoch": 0.6178762422430459, "grad_norm": 0.08984375, "learning_rate": 0.0013094297710885424, "loss": 1.1533, "step": 7046 }, { "epoch": 0.6179639340174206, "grad_norm": 0.048828125, "learning_rate": 0.001309035287596379, "loss": 1.2536, "step": 7047 }, { "epoch": 0.6180516257917951, "grad_norm": 0.068359375, "learning_rate": 0.0013086408351993175, "loss": 1.1333, "step": 7048 }, { "epoch": 0.6181393175661696, "grad_norm": 0.10009765625, "learning_rate": 0.0013082464139333298, "loss": 1.2078, "step": 7049 }, { "epoch": 0.6182270093405442, "grad_norm": 0.0703125, "learning_rate": 0.001307852023834388, "loss": 1.179, "step": 7050 }, { "epoch": 0.6183147011149187, "grad_norm": 0.07177734375, "learning_rate": 0.0013074576649384575, "loss": 1.212, "step": 7051 }, { "epoch": 0.6184023928892932, "grad_norm": 0.09765625, "learning_rate": 0.0013070633372815037, "loss": 1.1662, "step": 7052 }, { "epoch": 0.6184900846636678, "grad_norm": 0.09033203125, "learning_rate": 0.0013066690408994888, "loss": 1.1886, "step": 7053 }, { "epoch": 0.6185777764380423, "grad_norm": 0.04931640625, "learning_rate": 0.0013062747758283706, "loss": 1.1748, "step": 7054 }, { "epoch": 0.6186654682124169, "grad_norm": 0.08251953125, "learning_rate": 0.001305880542104106, "loss": 1.2238, "step": 7055 }, { "epoch": 0.6187531599867915, "grad_norm": 0.0947265625, "learning_rate": 0.0013054863397626473, "loss": 1.2607, "step": 7056 }, { "epoch": 0.618840851761166, "grad_norm": 0.06640625, "learning_rate": 0.0013050921688399452, "loss": 1.1707, "step": 7057 }, { "epoch": 0.6189285435355405, "grad_norm": 0.0498046875, "learning_rate": 0.0013046980293719471, "loss": 1.19, "step": 7058 }, { "epoch": 0.619016235309915, "grad_norm": 0.07958984375, "learning_rate": 0.001304303921394597, "loss": 1.1346, "step": 7059 }, { "epoch": 0.6191039270842896, "grad_norm": 0.07080078125, "learning_rate": 0.0013039098449438371, "loss": 1.2193, "step": 7060 }, { "epoch": 0.6191916188586641, "grad_norm": 0.0546875, "learning_rate": 0.0013035158000556056, "loss": 1.2072, "step": 7061 }, { "epoch": 0.6192793106330386, "grad_norm": 0.0498046875, "learning_rate": 0.0013031217867658389, "loss": 1.2151, "step": 7062 }, { "epoch": 0.6193670024074133, "grad_norm": 0.0546875, "learning_rate": 0.0013027278051104694, "loss": 1.1625, "step": 7063 }, { "epoch": 0.6194546941817878, "grad_norm": 0.06787109375, "learning_rate": 0.0013023338551254283, "loss": 1.1752, "step": 7064 }, { "epoch": 0.6195423859561623, "grad_norm": 0.08837890625, "learning_rate": 0.0013019399368466415, "loss": 1.1936, "step": 7065 }, { "epoch": 0.6196300777305369, "grad_norm": 0.055419921875, "learning_rate": 0.0013015460503100346, "loss": 1.238, "step": 7066 }, { "epoch": 0.6197177695049114, "grad_norm": 0.07080078125, "learning_rate": 0.0013011521955515275, "loss": 1.2182, "step": 7067 }, { "epoch": 0.6198054612792859, "grad_norm": 0.07861328125, "learning_rate": 0.0013007583726070402, "loss": 1.2513, "step": 7068 }, { "epoch": 0.6198931530536606, "grad_norm": 0.05615234375, "learning_rate": 0.0013003645815124875, "loss": 1.2442, "step": 7069 }, { "epoch": 0.6199808448280351, "grad_norm": 0.06982421875, "learning_rate": 0.0012999708223037827, "loss": 1.1717, "step": 7070 }, { "epoch": 0.6200685366024096, "grad_norm": 0.08740234375, "learning_rate": 0.001299577095016835, "loss": 1.1804, "step": 7071 }, { "epoch": 0.6201562283767842, "grad_norm": 0.0673828125, "learning_rate": 0.001299183399687552, "loss": 1.238, "step": 7072 }, { "epoch": 0.6202439201511587, "grad_norm": 0.046875, "learning_rate": 0.0012987897363518368, "loss": 1.1116, "step": 7073 }, { "epoch": 0.6203316119255332, "grad_norm": 0.0673828125, "learning_rate": 0.0012983961050455916, "loss": 1.1985, "step": 7074 }, { "epoch": 0.6204193036999078, "grad_norm": 0.056884765625, "learning_rate": 0.0012980025058047138, "loss": 1.2115, "step": 7075 }, { "epoch": 0.6205069954742823, "grad_norm": 0.060791015625, "learning_rate": 0.0012976089386650983, "loss": 1.1865, "step": 7076 }, { "epoch": 0.6205946872486569, "grad_norm": 0.0517578125, "learning_rate": 0.0012972154036626385, "loss": 1.1451, "step": 7077 }, { "epoch": 0.6206823790230315, "grad_norm": 0.048828125, "learning_rate": 0.0012968219008332228, "loss": 1.2504, "step": 7078 }, { "epoch": 0.620770070797406, "grad_norm": 0.05078125, "learning_rate": 0.0012964284302127381, "loss": 1.2345, "step": 7079 }, { "epoch": 0.6208577625717805, "grad_norm": 0.05224609375, "learning_rate": 0.0012960349918370677, "loss": 1.1494, "step": 7080 }, { "epoch": 0.6209454543461551, "grad_norm": 0.05322265625, "learning_rate": 0.0012956415857420924, "loss": 1.1921, "step": 7081 }, { "epoch": 0.6210331461205296, "grad_norm": 0.062255859375, "learning_rate": 0.00129524821196369, "loss": 1.1698, "step": 7082 }, { "epoch": 0.6211208378949041, "grad_norm": 0.05517578125, "learning_rate": 0.0012948548705377344, "loss": 1.1202, "step": 7083 }, { "epoch": 0.6212085296692786, "grad_norm": 0.060546875, "learning_rate": 0.0012944615615000984, "loss": 1.2189, "step": 7084 }, { "epoch": 0.6212962214436533, "grad_norm": 0.05322265625, "learning_rate": 0.0012940682848866494, "loss": 1.1637, "step": 7085 }, { "epoch": 0.6213839132180278, "grad_norm": 0.12158203125, "learning_rate": 0.0012936750407332544, "loss": 1.1967, "step": 7086 }, { "epoch": 0.6214716049924023, "grad_norm": 0.109375, "learning_rate": 0.0012932818290757758, "loss": 1.1743, "step": 7087 }, { "epoch": 0.6215592967667769, "grad_norm": 0.050048828125, "learning_rate": 0.0012928886499500736, "loss": 1.1689, "step": 7088 }, { "epoch": 0.6216469885411514, "grad_norm": 0.1484375, "learning_rate": 0.0012924955033920046, "loss": 1.1762, "step": 7089 }, { "epoch": 0.6217346803155259, "grad_norm": 0.07275390625, "learning_rate": 0.001292102389437423, "loss": 1.2016, "step": 7090 }, { "epoch": 0.6218223720899005, "grad_norm": 0.0546875, "learning_rate": 0.001291709308122179, "loss": 1.1593, "step": 7091 }, { "epoch": 0.621910063864275, "grad_norm": 0.080078125, "learning_rate": 0.001291316259482122, "loss": 1.1987, "step": 7092 }, { "epoch": 0.6219977556386496, "grad_norm": 0.07373046875, "learning_rate": 0.001290923243553096, "loss": 1.1814, "step": 7093 }, { "epoch": 0.6220854474130242, "grad_norm": 0.061767578125, "learning_rate": 0.0012905302603709427, "loss": 1.2289, "step": 7094 }, { "epoch": 0.6221731391873987, "grad_norm": 0.08935546875, "learning_rate": 0.001290137309971502, "loss": 1.2446, "step": 7095 }, { "epoch": 0.6222608309617732, "grad_norm": 0.061279296875, "learning_rate": 0.0012897443923906096, "loss": 1.1575, "step": 7096 }, { "epoch": 0.6223485227361478, "grad_norm": 0.0673828125, "learning_rate": 0.001289351507664099, "loss": 1.1977, "step": 7097 }, { "epoch": 0.6224362145105223, "grad_norm": 0.051025390625, "learning_rate": 0.0012889586558277992, "loss": 1.1484, "step": 7098 }, { "epoch": 0.6225239062848968, "grad_norm": 0.05224609375, "learning_rate": 0.0012885658369175383, "loss": 1.1695, "step": 7099 }, { "epoch": 0.6226115980592715, "grad_norm": 0.0673828125, "learning_rate": 0.0012881730509691402, "loss": 1.2548, "step": 7100 }, { "epoch": 0.622699289833646, "grad_norm": 0.0634765625, "learning_rate": 0.0012877802980184256, "loss": 1.1961, "step": 7101 }, { "epoch": 0.6227869816080205, "grad_norm": 0.07080078125, "learning_rate": 0.0012873875781012126, "loss": 1.1626, "step": 7102 }, { "epoch": 0.6228746733823951, "grad_norm": 0.049560546875, "learning_rate": 0.0012869948912533169, "loss": 1.2474, "step": 7103 }, { "epoch": 0.6229623651567696, "grad_norm": 0.056640625, "learning_rate": 0.0012866022375105494, "loss": 1.2174, "step": 7104 }, { "epoch": 0.6230500569311441, "grad_norm": 0.0517578125, "learning_rate": 0.0012862096169087203, "loss": 1.2153, "step": 7105 }, { "epoch": 0.6231377487055186, "grad_norm": 0.0537109375, "learning_rate": 0.0012858170294836346, "loss": 1.1311, "step": 7106 }, { "epoch": 0.6232254404798933, "grad_norm": 0.07568359375, "learning_rate": 0.0012854244752710956, "loss": 1.1964, "step": 7107 }, { "epoch": 0.6233131322542678, "grad_norm": 0.048095703125, "learning_rate": 0.0012850319543069042, "loss": 1.1532, "step": 7108 }, { "epoch": 0.6234008240286423, "grad_norm": 0.06298828125, "learning_rate": 0.0012846394666268556, "loss": 1.2057, "step": 7109 }, { "epoch": 0.6234885158030169, "grad_norm": 0.05615234375, "learning_rate": 0.0012842470122667443, "loss": 1.2098, "step": 7110 }, { "epoch": 0.6235762075773914, "grad_norm": 0.053466796875, "learning_rate": 0.0012838545912623618, "loss": 1.1697, "step": 7111 }, { "epoch": 0.6236638993517659, "grad_norm": 0.06298828125, "learning_rate": 0.0012834622036494952, "loss": 1.2301, "step": 7112 }, { "epoch": 0.6237515911261405, "grad_norm": 0.046875, "learning_rate": 0.0012830698494639295, "loss": 1.1254, "step": 7113 }, { "epoch": 0.623839282900515, "grad_norm": 0.048095703125, "learning_rate": 0.001282677528741446, "loss": 1.1591, "step": 7114 }, { "epoch": 0.6239269746748896, "grad_norm": 0.07666015625, "learning_rate": 0.0012822852415178244, "loss": 1.1772, "step": 7115 }, { "epoch": 0.6240146664492642, "grad_norm": 0.060546875, "learning_rate": 0.0012818929878288384, "loss": 1.2267, "step": 7116 }, { "epoch": 0.6241023582236387, "grad_norm": 0.061279296875, "learning_rate": 0.0012815007677102624, "loss": 1.2072, "step": 7117 }, { "epoch": 0.6241900499980132, "grad_norm": 0.06201171875, "learning_rate": 0.0012811085811978648, "loss": 1.2647, "step": 7118 }, { "epoch": 0.6242777417723878, "grad_norm": 0.052490234375, "learning_rate": 0.0012807164283274127, "loss": 1.1404, "step": 7119 }, { "epoch": 0.6243654335467623, "grad_norm": 0.05859375, "learning_rate": 0.0012803243091346684, "loss": 1.1742, "step": 7120 }, { "epoch": 0.6244531253211368, "grad_norm": 0.052978515625, "learning_rate": 0.0012799322236553936, "loss": 1.1873, "step": 7121 }, { "epoch": 0.6245408170955115, "grad_norm": 0.0703125, "learning_rate": 0.001279540171925344, "loss": 1.1713, "step": 7122 }, { "epoch": 0.624628508869886, "grad_norm": 0.05712890625, "learning_rate": 0.0012791481539802748, "loss": 1.1951, "step": 7123 }, { "epoch": 0.6247162006442605, "grad_norm": 0.054443359375, "learning_rate": 0.0012787561698559362, "loss": 1.1469, "step": 7124 }, { "epoch": 0.6248038924186351, "grad_norm": 0.09619140625, "learning_rate": 0.001278364219588077, "loss": 1.2831, "step": 7125 }, { "epoch": 0.6248915841930096, "grad_norm": 0.08935546875, "learning_rate": 0.0012779723032124417, "loss": 1.1682, "step": 7126 }, { "epoch": 0.6249792759673841, "grad_norm": 0.0576171875, "learning_rate": 0.0012775804207647717, "loss": 1.1491, "step": 7127 }, { "epoch": 0.6250669677417587, "grad_norm": 0.1259765625, "learning_rate": 0.001277188572280806, "loss": 1.2018, "step": 7128 }, { "epoch": 0.6251546595161332, "grad_norm": 0.08349609375, "learning_rate": 0.00127679675779628, "loss": 1.148, "step": 7129 }, { "epoch": 0.6252423512905078, "grad_norm": 0.04736328125, "learning_rate": 0.001276404977346926, "loss": 1.1189, "step": 7130 }, { "epoch": 0.6253300430648823, "grad_norm": 0.08544921875, "learning_rate": 0.0012760132309684738, "loss": 1.1727, "step": 7131 }, { "epoch": 0.6254177348392569, "grad_norm": 0.10791015625, "learning_rate": 0.0012756215186966497, "loss": 1.2046, "step": 7132 }, { "epoch": 0.6255054266136314, "grad_norm": 0.05126953125, "learning_rate": 0.0012752298405671763, "loss": 1.2025, "step": 7133 }, { "epoch": 0.6255931183880059, "grad_norm": 0.109375, "learning_rate": 0.0012748381966157742, "loss": 1.214, "step": 7134 }, { "epoch": 0.6256808101623805, "grad_norm": 0.068359375, "learning_rate": 0.0012744465868781605, "loss": 1.1423, "step": 7135 }, { "epoch": 0.625768501936755, "grad_norm": 0.052734375, "learning_rate": 0.0012740550113900478, "loss": 1.1227, "step": 7136 }, { "epoch": 0.6258561937111295, "grad_norm": 0.054931640625, "learning_rate": 0.001273663470187148, "loss": 1.1796, "step": 7137 }, { "epoch": 0.6259438854855042, "grad_norm": 0.06884765625, "learning_rate": 0.001273271963305168, "loss": 1.1882, "step": 7138 }, { "epoch": 0.6260315772598787, "grad_norm": 0.0576171875, "learning_rate": 0.0012728804907798127, "loss": 1.1834, "step": 7139 }, { "epoch": 0.6261192690342532, "grad_norm": 0.0576171875, "learning_rate": 0.001272489052646783, "loss": 1.1886, "step": 7140 }, { "epoch": 0.6262069608086278, "grad_norm": 0.05859375, "learning_rate": 0.001272097648941777, "loss": 1.1527, "step": 7141 }, { "epoch": 0.6262946525830023, "grad_norm": 0.0947265625, "learning_rate": 0.0012717062797004905, "loss": 1.2931, "step": 7142 }, { "epoch": 0.6263823443573768, "grad_norm": 0.051513671875, "learning_rate": 0.0012713149449586141, "loss": 1.1513, "step": 7143 }, { "epoch": 0.6264700361317515, "grad_norm": 0.0556640625, "learning_rate": 0.0012709236447518374, "loss": 1.1731, "step": 7144 }, { "epoch": 0.626557727906126, "grad_norm": 0.0625, "learning_rate": 0.0012705323791158455, "loss": 1.2159, "step": 7145 }, { "epoch": 0.6266454196805005, "grad_norm": 0.08203125, "learning_rate": 0.0012701411480863212, "loss": 1.2245, "step": 7146 }, { "epoch": 0.6267331114548751, "grad_norm": 0.05517578125, "learning_rate": 0.0012697499516989435, "loss": 1.2273, "step": 7147 }, { "epoch": 0.6268208032292496, "grad_norm": 0.05322265625, "learning_rate": 0.0012693587899893887, "loss": 1.1768, "step": 7148 }, { "epoch": 0.6269084950036241, "grad_norm": 0.052001953125, "learning_rate": 0.0012689676629933295, "loss": 1.2079, "step": 7149 }, { "epoch": 0.6269961867779987, "grad_norm": 0.058837890625, "learning_rate": 0.001268576570746436, "loss": 1.1682, "step": 7150 }, { "epoch": 0.6270838785523732, "grad_norm": 0.05615234375, "learning_rate": 0.0012681855132843744, "loss": 1.2099, "step": 7151 }, { "epoch": 0.6271715703267478, "grad_norm": 0.052978515625, "learning_rate": 0.0012677944906428085, "loss": 1.1799, "step": 7152 }, { "epoch": 0.6272592621011224, "grad_norm": 0.06298828125, "learning_rate": 0.0012674035028573981, "loss": 1.1845, "step": 7153 }, { "epoch": 0.6273469538754969, "grad_norm": 0.055419921875, "learning_rate": 0.0012670125499638011, "loss": 1.1695, "step": 7154 }, { "epoch": 0.6274346456498714, "grad_norm": 0.053955078125, "learning_rate": 0.0012666216319976702, "loss": 1.156, "step": 7155 }, { "epoch": 0.6275223374242459, "grad_norm": 0.051025390625, "learning_rate": 0.0012662307489946572, "loss": 1.1701, "step": 7156 }, { "epoch": 0.6276100291986205, "grad_norm": 0.06298828125, "learning_rate": 0.0012658399009904092, "loss": 1.1914, "step": 7157 }, { "epoch": 0.627697720972995, "grad_norm": 0.05908203125, "learning_rate": 0.0012654490880205705, "loss": 1.193, "step": 7158 }, { "epoch": 0.6277854127473695, "grad_norm": 0.0693359375, "learning_rate": 0.0012650583101207827, "loss": 1.2349, "step": 7159 }, { "epoch": 0.6278731045217442, "grad_norm": 0.061767578125, "learning_rate": 0.0012646675673266827, "loss": 1.2057, "step": 7160 }, { "epoch": 0.6279607962961187, "grad_norm": 0.08642578125, "learning_rate": 0.001264276859673906, "loss": 1.1036, "step": 7161 }, { "epoch": 0.6280484880704932, "grad_norm": 0.04931640625, "learning_rate": 0.0012638861871980837, "loss": 1.2223, "step": 7162 }, { "epoch": 0.6281361798448678, "grad_norm": 0.06201171875, "learning_rate": 0.0012634955499348447, "loss": 1.1688, "step": 7163 }, { "epoch": 0.6282238716192423, "grad_norm": 0.07568359375, "learning_rate": 0.0012631049479198137, "loss": 1.1538, "step": 7164 }, { "epoch": 0.6283115633936168, "grad_norm": 0.06884765625, "learning_rate": 0.0012627143811886124, "loss": 1.1411, "step": 7165 }, { "epoch": 0.6283992551679914, "grad_norm": 0.05078125, "learning_rate": 0.0012623238497768603, "loss": 1.1848, "step": 7166 }, { "epoch": 0.628486946942366, "grad_norm": 0.050048828125, "learning_rate": 0.0012619333537201717, "loss": 1.1519, "step": 7167 }, { "epoch": 0.6285746387167405, "grad_norm": 0.0478515625, "learning_rate": 0.0012615428930541602, "loss": 1.1472, "step": 7168 }, { "epoch": 0.6286623304911151, "grad_norm": 0.049072265625, "learning_rate": 0.0012611524678144332, "loss": 1.1498, "step": 7169 }, { "epoch": 0.6287500222654896, "grad_norm": 0.052978515625, "learning_rate": 0.001260762078036598, "loss": 1.1969, "step": 7170 }, { "epoch": 0.6288377140398641, "grad_norm": 0.0498046875, "learning_rate": 0.0012603717237562562, "loss": 1.1463, "step": 7171 }, { "epoch": 0.6289254058142387, "grad_norm": 0.05712890625, "learning_rate": 0.0012599814050090075, "loss": 1.2044, "step": 7172 }, { "epoch": 0.6290130975886132, "grad_norm": 0.0712890625, "learning_rate": 0.0012595911218304476, "loss": 1.1973, "step": 7173 }, { "epoch": 0.6291007893629877, "grad_norm": 0.052001953125, "learning_rate": 0.00125920087425617, "loss": 1.1551, "step": 7174 }, { "epoch": 0.6291884811373624, "grad_norm": 0.0654296875, "learning_rate": 0.0012588106623217632, "loss": 1.2081, "step": 7175 }, { "epoch": 0.6292761729117369, "grad_norm": 0.051025390625, "learning_rate": 0.001258420486062815, "loss": 1.1569, "step": 7176 }, { "epoch": 0.6293638646861114, "grad_norm": 0.053955078125, "learning_rate": 0.0012580303455149073, "loss": 1.1521, "step": 7177 }, { "epoch": 0.6294515564604859, "grad_norm": 0.058349609375, "learning_rate": 0.0012576402407136198, "loss": 1.1186, "step": 7178 }, { "epoch": 0.6295392482348605, "grad_norm": 0.053955078125, "learning_rate": 0.0012572501716945298, "loss": 1.1543, "step": 7179 }, { "epoch": 0.629626940009235, "grad_norm": 0.1103515625, "learning_rate": 0.0012568601384932102, "loss": 1.1555, "step": 7180 }, { "epoch": 0.6297146317836095, "grad_norm": 0.050048828125, "learning_rate": 0.0012564701411452311, "loss": 1.2396, "step": 7181 }, { "epoch": 0.6298023235579842, "grad_norm": 0.051513671875, "learning_rate": 0.0012560801796861587, "loss": 1.1973, "step": 7182 }, { "epoch": 0.6298900153323587, "grad_norm": 0.053955078125, "learning_rate": 0.0012556902541515577, "loss": 1.1501, "step": 7183 }, { "epoch": 0.6299777071067332, "grad_norm": 0.048583984375, "learning_rate": 0.0012553003645769873, "loss": 1.1975, "step": 7184 }, { "epoch": 0.6300653988811078, "grad_norm": 0.0546875, "learning_rate": 0.0012549105109980051, "loss": 1.13, "step": 7185 }, { "epoch": 0.6301530906554823, "grad_norm": 0.0751953125, "learning_rate": 0.001254520693450164, "loss": 1.1743, "step": 7186 }, { "epoch": 0.6302407824298568, "grad_norm": 0.048828125, "learning_rate": 0.0012541309119690146, "loss": 1.1171, "step": 7187 }, { "epoch": 0.6303284742042314, "grad_norm": 0.062255859375, "learning_rate": 0.0012537411665901046, "loss": 1.1934, "step": 7188 }, { "epoch": 0.630416165978606, "grad_norm": 0.044189453125, "learning_rate": 0.0012533514573489766, "loss": 1.1125, "step": 7189 }, { "epoch": 0.6305038577529805, "grad_norm": 0.056396484375, "learning_rate": 0.0012529617842811725, "loss": 1.1491, "step": 7190 }, { "epoch": 0.6305915495273551, "grad_norm": 0.0615234375, "learning_rate": 0.001252572147422228, "loss": 1.1801, "step": 7191 }, { "epoch": 0.6306792413017296, "grad_norm": 0.05810546875, "learning_rate": 0.0012521825468076783, "loss": 1.1904, "step": 7192 }, { "epoch": 0.6307669330761041, "grad_norm": 0.0751953125, "learning_rate": 0.001251792982473054, "loss": 1.1457, "step": 7193 }, { "epoch": 0.6308546248504787, "grad_norm": 0.0556640625, "learning_rate": 0.0012514034544538805, "loss": 1.092, "step": 7194 }, { "epoch": 0.6309423166248532, "grad_norm": 0.049560546875, "learning_rate": 0.0012510139627856835, "loss": 1.2241, "step": 7195 }, { "epoch": 0.6310300083992277, "grad_norm": 0.08251953125, "learning_rate": 0.0012506245075039827, "loss": 1.1949, "step": 7196 }, { "epoch": 0.6311177001736024, "grad_norm": 0.0673828125, "learning_rate": 0.0012502350886442963, "loss": 1.2253, "step": 7197 }, { "epoch": 0.6312053919479769, "grad_norm": 0.064453125, "learning_rate": 0.0012498457062421378, "loss": 1.1406, "step": 7198 }, { "epoch": 0.6312930837223514, "grad_norm": 0.0771484375, "learning_rate": 0.0012494563603330175, "loss": 1.2586, "step": 7199 }, { "epoch": 0.631380775496726, "grad_norm": 0.06005859375, "learning_rate": 0.0012490670509524434, "loss": 1.1402, "step": 7200 }, { "epoch": 0.6314684672711005, "grad_norm": 0.055908203125, "learning_rate": 0.0012486777781359195, "loss": 1.2358, "step": 7201 }, { "epoch": 0.631556159045475, "grad_norm": 0.07958984375, "learning_rate": 0.0012482885419189459, "loss": 1.226, "step": 7202 }, { "epoch": 0.6316438508198495, "grad_norm": 0.057373046875, "learning_rate": 0.0012478993423370205, "loss": 1.2072, "step": 7203 }, { "epoch": 0.6317315425942241, "grad_norm": 0.052001953125, "learning_rate": 0.0012475101794256365, "loss": 1.176, "step": 7204 }, { "epoch": 0.6318192343685987, "grad_norm": 0.061767578125, "learning_rate": 0.0012471210532202857, "loss": 1.2061, "step": 7205 }, { "epoch": 0.6319069261429732, "grad_norm": 0.0537109375, "learning_rate": 0.0012467319637564543, "loss": 1.1781, "step": 7206 }, { "epoch": 0.6319946179173478, "grad_norm": 0.05078125, "learning_rate": 0.0012463429110696272, "loss": 1.1745, "step": 7207 }, { "epoch": 0.6320823096917223, "grad_norm": 0.046630859375, "learning_rate": 0.0012459538951952844, "loss": 1.1807, "step": 7208 }, { "epoch": 0.6321700014660968, "grad_norm": 0.053466796875, "learning_rate": 0.0012455649161689033, "loss": 1.1418, "step": 7209 }, { "epoch": 0.6322576932404714, "grad_norm": 0.06005859375, "learning_rate": 0.001245175974025959, "loss": 1.1395, "step": 7210 }, { "epoch": 0.6323453850148459, "grad_norm": 0.07470703125, "learning_rate": 0.0012447870688019193, "loss": 1.1768, "step": 7211 }, { "epoch": 0.6324330767892204, "grad_norm": 0.0849609375, "learning_rate": 0.0012443982005322534, "loss": 1.2029, "step": 7212 }, { "epoch": 0.6325207685635951, "grad_norm": 0.12158203125, "learning_rate": 0.0012440093692524241, "loss": 1.1369, "step": 7213 }, { "epoch": 0.6326084603379696, "grad_norm": 0.05517578125, "learning_rate": 0.0012436205749978928, "loss": 1.1118, "step": 7214 }, { "epoch": 0.6326961521123441, "grad_norm": 0.09130859375, "learning_rate": 0.0012432318178041157, "loss": 1.1432, "step": 7215 }, { "epoch": 0.6327838438867187, "grad_norm": 0.10595703125, "learning_rate": 0.0012428430977065466, "loss": 1.1466, "step": 7216 }, { "epoch": 0.6328715356610932, "grad_norm": 0.0546875, "learning_rate": 0.0012424544147406364, "loss": 1.1235, "step": 7217 }, { "epoch": 0.6329592274354677, "grad_norm": 0.09814453125, "learning_rate": 0.0012420657689418315, "loss": 1.1734, "step": 7218 }, { "epoch": 0.6330469192098424, "grad_norm": 0.056884765625, "learning_rate": 0.0012416771603455752, "loss": 1.161, "step": 7219 }, { "epoch": 0.6331346109842169, "grad_norm": 0.06396484375, "learning_rate": 0.0012412885889873074, "loss": 1.1711, "step": 7220 }, { "epoch": 0.6332223027585914, "grad_norm": 0.07373046875, "learning_rate": 0.001240900054902466, "loss": 1.1908, "step": 7221 }, { "epoch": 0.633309994532966, "grad_norm": 0.06005859375, "learning_rate": 0.0012405115581264829, "loss": 1.1679, "step": 7222 }, { "epoch": 0.6333976863073405, "grad_norm": 0.052978515625, "learning_rate": 0.0012401230986947885, "loss": 1.167, "step": 7223 }, { "epoch": 0.633485378081715, "grad_norm": 0.056640625, "learning_rate": 0.0012397346766428099, "loss": 1.1412, "step": 7224 }, { "epoch": 0.6335730698560896, "grad_norm": 0.064453125, "learning_rate": 0.0012393462920059696, "loss": 1.2023, "step": 7225 }, { "epoch": 0.6336607616304641, "grad_norm": 0.07470703125, "learning_rate": 0.0012389579448196868, "loss": 1.1979, "step": 7226 }, { "epoch": 0.6337484534048387, "grad_norm": 0.06787109375, "learning_rate": 0.0012385696351193795, "loss": 1.1696, "step": 7227 }, { "epoch": 0.6338361451792132, "grad_norm": 0.06787109375, "learning_rate": 0.0012381813629404586, "loss": 1.1246, "step": 7228 }, { "epoch": 0.6339238369535878, "grad_norm": 0.053466796875, "learning_rate": 0.0012377931283183345, "loss": 1.2294, "step": 7229 }, { "epoch": 0.6340115287279623, "grad_norm": 0.05126953125, "learning_rate": 0.0012374049312884128, "loss": 1.1833, "step": 7230 }, { "epoch": 0.6340992205023368, "grad_norm": 0.05078125, "learning_rate": 0.0012370167718860963, "loss": 1.1366, "step": 7231 }, { "epoch": 0.6341869122767114, "grad_norm": 0.05224609375, "learning_rate": 0.0012366286501467843, "loss": 1.1216, "step": 7232 }, { "epoch": 0.6342746040510859, "grad_norm": 0.052978515625, "learning_rate": 0.0012362405661058718, "loss": 1.2009, "step": 7233 }, { "epoch": 0.6343622958254604, "grad_norm": 0.0478515625, "learning_rate": 0.0012358525197987522, "loss": 1.1993, "step": 7234 }, { "epoch": 0.6344499875998351, "grad_norm": 0.052978515625, "learning_rate": 0.0012354645112608131, "loss": 1.1264, "step": 7235 }, { "epoch": 0.6345376793742096, "grad_norm": 0.055419921875, "learning_rate": 0.0012350765405274406, "loss": 1.1857, "step": 7236 }, { "epoch": 0.6346253711485841, "grad_norm": 0.07177734375, "learning_rate": 0.0012346886076340167, "loss": 1.236, "step": 7237 }, { "epoch": 0.6347130629229587, "grad_norm": 0.0556640625, "learning_rate": 0.0012343007126159196, "loss": 1.2104, "step": 7238 }, { "epoch": 0.6348007546973332, "grad_norm": 0.051513671875, "learning_rate": 0.0012339128555085245, "loss": 1.154, "step": 7239 }, { "epoch": 0.6348884464717077, "grad_norm": 0.051513671875, "learning_rate": 0.0012335250363472024, "loss": 1.1575, "step": 7240 }, { "epoch": 0.6349761382460823, "grad_norm": 0.05029296875, "learning_rate": 0.001233137255167322, "loss": 1.1852, "step": 7241 }, { "epoch": 0.6350638300204569, "grad_norm": 0.05615234375, "learning_rate": 0.001232749512004248, "loss": 1.1658, "step": 7242 }, { "epoch": 0.6351515217948314, "grad_norm": 0.052490234375, "learning_rate": 0.0012323618068933412, "loss": 1.1862, "step": 7243 }, { "epoch": 0.635239213569206, "grad_norm": 0.05419921875, "learning_rate": 0.0012319741398699603, "loss": 1.221, "step": 7244 }, { "epoch": 0.6353269053435805, "grad_norm": 0.080078125, "learning_rate": 0.0012315865109694577, "loss": 1.2021, "step": 7245 }, { "epoch": 0.635414597117955, "grad_norm": 0.0556640625, "learning_rate": 0.0012311989202271859, "loss": 1.23, "step": 7246 }, { "epoch": 0.6355022888923296, "grad_norm": 0.04833984375, "learning_rate": 0.0012308113676784906, "loss": 1.1431, "step": 7247 }, { "epoch": 0.6355899806667041, "grad_norm": 0.06005859375, "learning_rate": 0.001230423853358717, "loss": 1.1881, "step": 7248 }, { "epoch": 0.6356776724410786, "grad_norm": 0.0546875, "learning_rate": 0.0012300363773032048, "loss": 1.2022, "step": 7249 }, { "epoch": 0.6357653642154532, "grad_norm": 0.058349609375, "learning_rate": 0.001229648939547291, "loss": 1.274, "step": 7250 }, { "epoch": 0.6358530559898278, "grad_norm": 0.05078125, "learning_rate": 0.001229261540126308, "loss": 1.2121, "step": 7251 }, { "epoch": 0.6359407477642023, "grad_norm": 0.05224609375, "learning_rate": 0.0012288741790755873, "loss": 1.1892, "step": 7252 }, { "epoch": 0.6360284395385768, "grad_norm": 0.058837890625, "learning_rate": 0.0012284868564304539, "loss": 1.1518, "step": 7253 }, { "epoch": 0.6361161313129514, "grad_norm": 0.05712890625, "learning_rate": 0.0012280995722262313, "loss": 1.1545, "step": 7254 }, { "epoch": 0.6362038230873259, "grad_norm": 0.0537109375, "learning_rate": 0.0012277123264982385, "loss": 1.2153, "step": 7255 }, { "epoch": 0.6362915148617004, "grad_norm": 0.05859375, "learning_rate": 0.0012273251192817913, "loss": 1.2118, "step": 7256 }, { "epoch": 0.6363792066360751, "grad_norm": 0.0693359375, "learning_rate": 0.001226937950612202, "loss": 1.1144, "step": 7257 }, { "epoch": 0.6364668984104496, "grad_norm": 0.0537109375, "learning_rate": 0.0012265508205247797, "loss": 1.24, "step": 7258 }, { "epoch": 0.6365545901848241, "grad_norm": 0.053955078125, "learning_rate": 0.0012261637290548293, "loss": 1.2194, "step": 7259 }, { "epoch": 0.6366422819591987, "grad_norm": 0.059326171875, "learning_rate": 0.0012257766762376527, "loss": 1.1872, "step": 7260 }, { "epoch": 0.6367299737335732, "grad_norm": 0.062255859375, "learning_rate": 0.0012253896621085487, "loss": 1.2282, "step": 7261 }, { "epoch": 0.6368176655079477, "grad_norm": 0.052490234375, "learning_rate": 0.0012250026867028105, "loss": 1.2293, "step": 7262 }, { "epoch": 0.6369053572823223, "grad_norm": 0.052978515625, "learning_rate": 0.0012246157500557307, "loss": 1.1818, "step": 7263 }, { "epoch": 0.6369930490566968, "grad_norm": 0.058837890625, "learning_rate": 0.0012242288522025962, "loss": 1.1724, "step": 7264 }, { "epoch": 0.6370807408310714, "grad_norm": 0.05322265625, "learning_rate": 0.0012238419931786912, "loss": 1.1957, "step": 7265 }, { "epoch": 0.637168432605446, "grad_norm": 0.057373046875, "learning_rate": 0.0012234551730192962, "loss": 1.2193, "step": 7266 }, { "epoch": 0.6372561243798205, "grad_norm": 0.0595703125, "learning_rate": 0.0012230683917596884, "loss": 1.1978, "step": 7267 }, { "epoch": 0.637343816154195, "grad_norm": 0.0908203125, "learning_rate": 0.0012226816494351413, "loss": 1.1995, "step": 7268 }, { "epoch": 0.6374315079285696, "grad_norm": 0.06005859375, "learning_rate": 0.001222294946080924, "loss": 1.1737, "step": 7269 }, { "epoch": 0.6375191997029441, "grad_norm": 0.07373046875, "learning_rate": 0.0012219082817323042, "loss": 1.1851, "step": 7270 }, { "epoch": 0.6376068914773186, "grad_norm": 0.056396484375, "learning_rate": 0.0012215216564245438, "loss": 1.1593, "step": 7271 }, { "epoch": 0.6376945832516933, "grad_norm": 0.047119140625, "learning_rate": 0.0012211350701929024, "loss": 1.2498, "step": 7272 }, { "epoch": 0.6377822750260678, "grad_norm": 0.1025390625, "learning_rate": 0.0012207485230726351, "loss": 1.192, "step": 7273 }, { "epoch": 0.6378699668004423, "grad_norm": 0.061279296875, "learning_rate": 0.001220362015098995, "loss": 1.1403, "step": 7274 }, { "epoch": 0.6379576585748168, "grad_norm": 0.061279296875, "learning_rate": 0.0012199755463072292, "loss": 1.1408, "step": 7275 }, { "epoch": 0.6380453503491914, "grad_norm": 0.06787109375, "learning_rate": 0.0012195891167325844, "loss": 1.1773, "step": 7276 }, { "epoch": 0.6381330421235659, "grad_norm": 0.078125, "learning_rate": 0.0012192027264103001, "loss": 1.2052, "step": 7277 }, { "epoch": 0.6382207338979404, "grad_norm": 0.048828125, "learning_rate": 0.0012188163753756157, "loss": 1.1792, "step": 7278 }, { "epoch": 0.638308425672315, "grad_norm": 0.0576171875, "learning_rate": 0.001218430063663765, "loss": 1.2209, "step": 7279 }, { "epoch": 0.6383961174466896, "grad_norm": 0.0712890625, "learning_rate": 0.001218043791309978, "loss": 1.1568, "step": 7280 }, { "epoch": 0.6384838092210641, "grad_norm": 0.0615234375, "learning_rate": 0.0012176575583494821, "loss": 1.1825, "step": 7281 }, { "epoch": 0.6385715009954387, "grad_norm": 0.05029296875, "learning_rate": 0.0012172713648175007, "loss": 1.2058, "step": 7282 }, { "epoch": 0.6386591927698132, "grad_norm": 0.07666015625, "learning_rate": 0.0012168852107492538, "loss": 1.1702, "step": 7283 }, { "epoch": 0.6387468845441877, "grad_norm": 0.06494140625, "learning_rate": 0.0012164990961799579, "loss": 1.1822, "step": 7284 }, { "epoch": 0.6388345763185623, "grad_norm": 0.052001953125, "learning_rate": 0.001216113021144825, "loss": 1.2835, "step": 7285 }, { "epoch": 0.6389222680929368, "grad_norm": 0.08251953125, "learning_rate": 0.0012157269856790647, "loss": 1.1997, "step": 7286 }, { "epoch": 0.6390099598673114, "grad_norm": 0.05615234375, "learning_rate": 0.0012153409898178823, "loss": 1.1683, "step": 7287 }, { "epoch": 0.639097651641686, "grad_norm": 0.061767578125, "learning_rate": 0.001214955033596479, "loss": 1.1533, "step": 7288 }, { "epoch": 0.6391853434160605, "grad_norm": 0.046875, "learning_rate": 0.001214569117050054, "loss": 1.1359, "step": 7289 }, { "epoch": 0.639273035190435, "grad_norm": 0.052490234375, "learning_rate": 0.0012141832402138017, "loss": 1.1657, "step": 7290 }, { "epoch": 0.6393607269648096, "grad_norm": 0.046142578125, "learning_rate": 0.0012137974031229126, "loss": 1.1372, "step": 7291 }, { "epoch": 0.6394484187391841, "grad_norm": 0.0703125, "learning_rate": 0.001213411605812574, "loss": 1.1505, "step": 7292 }, { "epoch": 0.6395361105135586, "grad_norm": 0.05322265625, "learning_rate": 0.0012130258483179703, "loss": 1.156, "step": 7293 }, { "epoch": 0.6396238022879333, "grad_norm": 0.050537109375, "learning_rate": 0.001212640130674281, "loss": 1.1317, "step": 7294 }, { "epoch": 0.6397114940623078, "grad_norm": 0.0595703125, "learning_rate": 0.0012122544529166832, "loss": 1.1931, "step": 7295 }, { "epoch": 0.6397991858366823, "grad_norm": 0.054931640625, "learning_rate": 0.0012118688150803492, "loss": 1.182, "step": 7296 }, { "epoch": 0.6398868776110568, "grad_norm": 0.050537109375, "learning_rate": 0.0012114832172004477, "loss": 1.1563, "step": 7297 }, { "epoch": 0.6399745693854314, "grad_norm": 0.053466796875, "learning_rate": 0.0012110976593121448, "loss": 1.1367, "step": 7298 }, { "epoch": 0.6400622611598059, "grad_norm": 0.057861328125, "learning_rate": 0.0012107121414506024, "loss": 1.1896, "step": 7299 }, { "epoch": 0.6401499529341804, "grad_norm": 0.0537109375, "learning_rate": 0.0012103266636509789, "loss": 1.1199, "step": 7300 }, { "epoch": 0.640237644708555, "grad_norm": 0.0576171875, "learning_rate": 0.0012099412259484288, "loss": 1.1267, "step": 7301 }, { "epoch": 0.6403253364829296, "grad_norm": 0.06298828125, "learning_rate": 0.0012095558283781022, "loss": 1.2119, "step": 7302 }, { "epoch": 0.6404130282573041, "grad_norm": 0.0439453125, "learning_rate": 0.0012091704709751479, "loss": 1.1522, "step": 7303 }, { "epoch": 0.6405007200316787, "grad_norm": 0.07080078125, "learning_rate": 0.001208785153774708, "loss": 1.1158, "step": 7304 }, { "epoch": 0.6405884118060532, "grad_norm": 0.05908203125, "learning_rate": 0.0012083998768119234, "loss": 1.1841, "step": 7305 }, { "epoch": 0.6406761035804277, "grad_norm": 0.049072265625, "learning_rate": 0.00120801464012193, "loss": 1.1718, "step": 7306 }, { "epoch": 0.6407637953548023, "grad_norm": 0.0693359375, "learning_rate": 0.0012076294437398606, "loss": 1.1986, "step": 7307 }, { "epoch": 0.6408514871291768, "grad_norm": 0.051025390625, "learning_rate": 0.0012072442877008435, "loss": 1.2021, "step": 7308 }, { "epoch": 0.6409391789035513, "grad_norm": 0.053955078125, "learning_rate": 0.0012068591720400049, "loss": 1.2059, "step": 7309 }, { "epoch": 0.641026870677926, "grad_norm": 0.04736328125, "learning_rate": 0.0012064740967924655, "loss": 1.1934, "step": 7310 }, { "epoch": 0.6411145624523005, "grad_norm": 0.059326171875, "learning_rate": 0.0012060890619933437, "loss": 1.2322, "step": 7311 }, { "epoch": 0.641202254226675, "grad_norm": 0.06591796875, "learning_rate": 0.0012057040676777537, "loss": 1.1316, "step": 7312 }, { "epoch": 0.6412899460010496, "grad_norm": 0.060791015625, "learning_rate": 0.0012053191138808057, "loss": 1.1808, "step": 7313 }, { "epoch": 0.6413776377754241, "grad_norm": 0.052490234375, "learning_rate": 0.0012049342006376065, "loss": 1.1714, "step": 7314 }, { "epoch": 0.6414653295497986, "grad_norm": 0.06640625, "learning_rate": 0.001204549327983259, "loss": 1.1824, "step": 7315 }, { "epoch": 0.6415530213241732, "grad_norm": 0.05078125, "learning_rate": 0.001204164495952863, "loss": 1.2326, "step": 7316 }, { "epoch": 0.6416407130985478, "grad_norm": 0.06298828125, "learning_rate": 0.0012037797045815139, "loss": 1.1809, "step": 7317 }, { "epoch": 0.6417284048729223, "grad_norm": 0.08056640625, "learning_rate": 0.0012033949539043045, "loss": 1.2163, "step": 7318 }, { "epoch": 0.6418160966472969, "grad_norm": 0.0888671875, "learning_rate": 0.0012030102439563217, "loss": 1.1505, "step": 7319 }, { "epoch": 0.6419037884216714, "grad_norm": 0.0927734375, "learning_rate": 0.001202625574772651, "loss": 1.1613, "step": 7320 }, { "epoch": 0.6419914801960459, "grad_norm": 0.05322265625, "learning_rate": 0.0012022409463883729, "loss": 1.1982, "step": 7321 }, { "epoch": 0.6420791719704204, "grad_norm": 0.09716796875, "learning_rate": 0.0012018563588385648, "loss": 1.1667, "step": 7322 }, { "epoch": 0.642166863744795, "grad_norm": 0.09814453125, "learning_rate": 0.0012014718121582996, "loss": 1.1697, "step": 7323 }, { "epoch": 0.6422545555191695, "grad_norm": 0.05126953125, "learning_rate": 0.001201087306382647, "loss": 1.2052, "step": 7324 }, { "epoch": 0.642342247293544, "grad_norm": 0.0556640625, "learning_rate": 0.001200702841546674, "loss": 1.1106, "step": 7325 }, { "epoch": 0.6424299390679187, "grad_norm": 0.064453125, "learning_rate": 0.0012003184176854415, "loss": 1.1628, "step": 7326 }, { "epoch": 0.6425176308422932, "grad_norm": 0.0791015625, "learning_rate": 0.0011999340348340084, "loss": 1.2119, "step": 7327 }, { "epoch": 0.6426053226166677, "grad_norm": 0.054443359375, "learning_rate": 0.0011995496930274298, "loss": 1.1223, "step": 7328 }, { "epoch": 0.6426930143910423, "grad_norm": 0.07763671875, "learning_rate": 0.0011991653923007564, "loss": 1.1761, "step": 7329 }, { "epoch": 0.6427807061654168, "grad_norm": 0.0595703125, "learning_rate": 0.0011987811326890356, "loss": 1.2311, "step": 7330 }, { "epoch": 0.6428683979397913, "grad_norm": 0.055419921875, "learning_rate": 0.0011983969142273102, "loss": 1.1843, "step": 7331 }, { "epoch": 0.642956089714166, "grad_norm": 0.06396484375, "learning_rate": 0.0011980127369506206, "loss": 1.1666, "step": 7332 }, { "epoch": 0.6430437814885405, "grad_norm": 0.054931640625, "learning_rate": 0.0011976286008940022, "loss": 1.1653, "step": 7333 }, { "epoch": 0.643131473262915, "grad_norm": 0.064453125, "learning_rate": 0.0011972445060924885, "loss": 1.1805, "step": 7334 }, { "epoch": 0.6432191650372896, "grad_norm": 0.05517578125, "learning_rate": 0.0011968604525811062, "loss": 1.1791, "step": 7335 }, { "epoch": 0.6433068568116641, "grad_norm": 0.05126953125, "learning_rate": 0.001196476440394881, "loss": 1.1466, "step": 7336 }, { "epoch": 0.6433945485860386, "grad_norm": 0.05859375, "learning_rate": 0.0011960924695688338, "loss": 1.2314, "step": 7337 }, { "epoch": 0.6434822403604132, "grad_norm": 0.0517578125, "learning_rate": 0.0011957085401379816, "loss": 1.1569, "step": 7338 }, { "epoch": 0.6435699321347877, "grad_norm": 0.052734375, "learning_rate": 0.001195324652137338, "loss": 1.1534, "step": 7339 }, { "epoch": 0.6436576239091623, "grad_norm": 0.0498046875, "learning_rate": 0.0011949408056019125, "loss": 1.1219, "step": 7340 }, { "epoch": 0.6437453156835369, "grad_norm": 0.0625, "learning_rate": 0.00119455700056671, "loss": 1.1188, "step": 7341 }, { "epoch": 0.6438330074579114, "grad_norm": 0.04638671875, "learning_rate": 0.0011941732370667338, "loss": 1.1419, "step": 7342 }, { "epoch": 0.6439206992322859, "grad_norm": 0.0673828125, "learning_rate": 0.0011937895151369817, "loss": 1.2332, "step": 7343 }, { "epoch": 0.6440083910066605, "grad_norm": 0.0595703125, "learning_rate": 0.0011934058348124484, "loss": 1.2239, "step": 7344 }, { "epoch": 0.644096082781035, "grad_norm": 0.07568359375, "learning_rate": 0.0011930221961281241, "loss": 1.1706, "step": 7345 }, { "epoch": 0.6441837745554095, "grad_norm": 0.054931640625, "learning_rate": 0.001192638599118996, "loss": 1.2002, "step": 7346 }, { "epoch": 0.644271466329784, "grad_norm": 0.056884765625, "learning_rate": 0.0011922550438200465, "loss": 1.2201, "step": 7347 }, { "epoch": 0.6443591581041587, "grad_norm": 0.0791015625, "learning_rate": 0.0011918715302662555, "loss": 1.2123, "step": 7348 }, { "epoch": 0.6444468498785332, "grad_norm": 0.054443359375, "learning_rate": 0.0011914880584925984, "loss": 1.2124, "step": 7349 }, { "epoch": 0.6445345416529077, "grad_norm": 0.047119140625, "learning_rate": 0.0011911046285340468, "loss": 1.2123, "step": 7350 }, { "epoch": 0.6446222334272823, "grad_norm": 0.0517578125, "learning_rate": 0.0011907212404255682, "loss": 1.1645, "step": 7351 }, { "epoch": 0.6447099252016568, "grad_norm": 0.045654296875, "learning_rate": 0.001190337894202127, "loss": 1.1292, "step": 7352 }, { "epoch": 0.6447976169760313, "grad_norm": 0.054931640625, "learning_rate": 0.001189954589898683, "loss": 1.1467, "step": 7353 }, { "epoch": 0.644885308750406, "grad_norm": 0.049072265625, "learning_rate": 0.001189571327550193, "loss": 1.2117, "step": 7354 }, { "epoch": 0.6449730005247805, "grad_norm": 0.05126953125, "learning_rate": 0.0011891881071916092, "loss": 1.2332, "step": 7355 }, { "epoch": 0.645060692299155, "grad_norm": 0.05859375, "learning_rate": 0.001188804928857881, "loss": 1.1835, "step": 7356 }, { "epoch": 0.6451483840735296, "grad_norm": 0.08203125, "learning_rate": 0.0011884217925839519, "loss": 1.2221, "step": 7357 }, { "epoch": 0.6452360758479041, "grad_norm": 0.0517578125, "learning_rate": 0.0011880386984047643, "loss": 1.1804, "step": 7358 }, { "epoch": 0.6453237676222786, "grad_norm": 0.08447265625, "learning_rate": 0.0011876556463552544, "loss": 1.1873, "step": 7359 }, { "epoch": 0.6454114593966532, "grad_norm": 0.06298828125, "learning_rate": 0.0011872726364703567, "loss": 1.1939, "step": 7360 }, { "epoch": 0.6454991511710277, "grad_norm": 0.072265625, "learning_rate": 0.0011868896687849994, "loss": 1.1633, "step": 7361 }, { "epoch": 0.6455868429454023, "grad_norm": 0.09130859375, "learning_rate": 0.0011865067433341093, "loss": 1.1577, "step": 7362 }, { "epoch": 0.6456745347197769, "grad_norm": 0.09619140625, "learning_rate": 0.001186123860152608, "loss": 1.209, "step": 7363 }, { "epoch": 0.6457622264941514, "grad_norm": 0.05810546875, "learning_rate": 0.0011857410192754124, "loss": 1.1794, "step": 7364 }, { "epoch": 0.6458499182685259, "grad_norm": 0.08349609375, "learning_rate": 0.0011853582207374375, "loss": 1.2, "step": 7365 }, { "epoch": 0.6459376100429005, "grad_norm": 0.09130859375, "learning_rate": 0.0011849754645735936, "loss": 1.1296, "step": 7366 }, { "epoch": 0.646025301817275, "grad_norm": 0.0908203125, "learning_rate": 0.0011845927508187869, "loss": 1.1876, "step": 7367 }, { "epoch": 0.6461129935916495, "grad_norm": 0.05810546875, "learning_rate": 0.0011842100795079196, "loss": 1.1452, "step": 7368 }, { "epoch": 0.646200685366024, "grad_norm": 0.10546875, "learning_rate": 0.0011838274506758906, "loss": 1.2242, "step": 7369 }, { "epoch": 0.6462883771403987, "grad_norm": 0.055908203125, "learning_rate": 0.001183444864357595, "loss": 1.1346, "step": 7370 }, { "epoch": 0.6463760689147732, "grad_norm": 0.046630859375, "learning_rate": 0.001183062320587923, "loss": 1.1248, "step": 7371 }, { "epoch": 0.6464637606891477, "grad_norm": 0.050537109375, "learning_rate": 0.0011826798194017622, "loss": 1.1631, "step": 7372 }, { "epoch": 0.6465514524635223, "grad_norm": 0.05712890625, "learning_rate": 0.0011822973608339956, "loss": 1.1727, "step": 7373 }, { "epoch": 0.6466391442378968, "grad_norm": 0.05224609375, "learning_rate": 0.001181914944919502, "loss": 1.1563, "step": 7374 }, { "epoch": 0.6467268360122713, "grad_norm": 0.04833984375, "learning_rate": 0.0011815325716931573, "loss": 1.1209, "step": 7375 }, { "epoch": 0.6468145277866459, "grad_norm": 0.051513671875, "learning_rate": 0.001181150241189833, "loss": 1.2102, "step": 7376 }, { "epoch": 0.6469022195610205, "grad_norm": 0.068359375, "learning_rate": 0.0011807679534443956, "loss": 1.2227, "step": 7377 }, { "epoch": 0.646989911335395, "grad_norm": 0.052001953125, "learning_rate": 0.0011803857084917101, "loss": 1.1377, "step": 7378 }, { "epoch": 0.6470776031097696, "grad_norm": 0.04931640625, "learning_rate": 0.0011800035063666353, "loss": 1.1745, "step": 7379 }, { "epoch": 0.6471652948841441, "grad_norm": 0.056396484375, "learning_rate": 0.001179621347104028, "loss": 1.1802, "step": 7380 }, { "epoch": 0.6472529866585186, "grad_norm": 0.04931640625, "learning_rate": 0.0011792392307387396, "loss": 1.2016, "step": 7381 }, { "epoch": 0.6473406784328932, "grad_norm": 0.060302734375, "learning_rate": 0.001178857157305618, "loss": 1.1789, "step": 7382 }, { "epoch": 0.6474283702072677, "grad_norm": 0.05029296875, "learning_rate": 0.0011784751268395068, "loss": 1.1403, "step": 7383 }, { "epoch": 0.6475160619816422, "grad_norm": 0.06787109375, "learning_rate": 0.0011780931393752473, "loss": 1.1552, "step": 7384 }, { "epoch": 0.6476037537560169, "grad_norm": 0.045654296875, "learning_rate": 0.001177711194947675, "loss": 1.1272, "step": 7385 }, { "epoch": 0.6476914455303914, "grad_norm": 0.050048828125, "learning_rate": 0.0011773292935916228, "loss": 1.1466, "step": 7386 }, { "epoch": 0.6477791373047659, "grad_norm": 0.046875, "learning_rate": 0.0011769474353419188, "loss": 1.1695, "step": 7387 }, { "epoch": 0.6478668290791405, "grad_norm": 0.08642578125, "learning_rate": 0.001176565620233387, "loss": 1.1394, "step": 7388 }, { "epoch": 0.647954520853515, "grad_norm": 0.0703125, "learning_rate": 0.001176183848300849, "loss": 1.15, "step": 7389 }, { "epoch": 0.6480422126278895, "grad_norm": 0.054931640625, "learning_rate": 0.0011758021195791206, "loss": 1.1713, "step": 7390 }, { "epoch": 0.6481299044022641, "grad_norm": 0.049560546875, "learning_rate": 0.0011754204341030152, "loss": 1.18, "step": 7391 }, { "epoch": 0.6482175961766387, "grad_norm": 0.07177734375, "learning_rate": 0.0011750387919073405, "loss": 1.1912, "step": 7392 }, { "epoch": 0.6483052879510132, "grad_norm": 0.047607421875, "learning_rate": 0.0011746571930269024, "loss": 1.1346, "step": 7393 }, { "epoch": 0.6483929797253877, "grad_norm": 0.053955078125, "learning_rate": 0.0011742756374965007, "loss": 1.1431, "step": 7394 }, { "epoch": 0.6484806714997623, "grad_norm": 0.06689453125, "learning_rate": 0.0011738941253509334, "loss": 1.1408, "step": 7395 }, { "epoch": 0.6485683632741368, "grad_norm": 0.060791015625, "learning_rate": 0.0011735126566249927, "loss": 1.1587, "step": 7396 }, { "epoch": 0.6486560550485113, "grad_norm": 0.05615234375, "learning_rate": 0.0011731312313534674, "loss": 1.2242, "step": 7397 }, { "epoch": 0.6487437468228859, "grad_norm": 0.09375, "learning_rate": 0.0011727498495711427, "loss": 1.2343, "step": 7398 }, { "epoch": 0.6488314385972604, "grad_norm": 0.08349609375, "learning_rate": 0.0011723685113127998, "loss": 1.1998, "step": 7399 }, { "epoch": 0.648919130371635, "grad_norm": 0.1103515625, "learning_rate": 0.0011719872166132156, "loss": 1.201, "step": 7400 }, { "epoch": 0.6490068221460096, "grad_norm": 0.08349609375, "learning_rate": 0.0011716059655071634, "loss": 1.147, "step": 7401 }, { "epoch": 0.6490945139203841, "grad_norm": 0.05908203125, "learning_rate": 0.0011712247580294122, "loss": 1.1857, "step": 7402 }, { "epoch": 0.6491822056947586, "grad_norm": 0.080078125, "learning_rate": 0.001170843594214727, "loss": 1.135, "step": 7403 }, { "epoch": 0.6492698974691332, "grad_norm": 0.09326171875, "learning_rate": 0.0011704624740978694, "loss": 1.1966, "step": 7404 }, { "epoch": 0.6493575892435077, "grad_norm": 0.06494140625, "learning_rate": 0.0011700813977135955, "loss": 1.2206, "step": 7405 }, { "epoch": 0.6494452810178822, "grad_norm": 0.056884765625, "learning_rate": 0.0011697003650966596, "loss": 1.165, "step": 7406 }, { "epoch": 0.6495329727922569, "grad_norm": 0.06787109375, "learning_rate": 0.0011693193762818109, "loss": 1.1282, "step": 7407 }, { "epoch": 0.6496206645666314, "grad_norm": 0.060791015625, "learning_rate": 0.001168938431303794, "loss": 1.1888, "step": 7408 }, { "epoch": 0.6497083563410059, "grad_norm": 0.047607421875, "learning_rate": 0.00116855753019735, "loss": 1.1684, "step": 7409 }, { "epoch": 0.6497960481153805, "grad_norm": 0.0576171875, "learning_rate": 0.0011681766729972165, "loss": 1.1866, "step": 7410 }, { "epoch": 0.649883739889755, "grad_norm": 0.0869140625, "learning_rate": 0.001167795859738127, "loss": 1.197, "step": 7411 }, { "epoch": 0.6499714316641295, "grad_norm": 0.0537109375, "learning_rate": 0.0011674150904548099, "loss": 1.185, "step": 7412 }, { "epoch": 0.6500591234385041, "grad_norm": 0.051513671875, "learning_rate": 0.0011670343651819912, "loss": 1.1928, "step": 7413 }, { "epoch": 0.6501468152128786, "grad_norm": 0.08740234375, "learning_rate": 0.0011666536839543913, "loss": 1.235, "step": 7414 }, { "epoch": 0.6502345069872532, "grad_norm": 0.0498046875, "learning_rate": 0.0011662730468067271, "loss": 1.2051, "step": 7415 }, { "epoch": 0.6503221987616278, "grad_norm": 0.0517578125, "learning_rate": 0.0011658924537737132, "loss": 1.1755, "step": 7416 }, { "epoch": 0.6504098905360023, "grad_norm": 0.048828125, "learning_rate": 0.0011655119048900574, "loss": 1.1575, "step": 7417 }, { "epoch": 0.6504975823103768, "grad_norm": 0.053466796875, "learning_rate": 0.0011651314001904654, "loss": 1.1717, "step": 7418 }, { "epoch": 0.6505852740847513, "grad_norm": 0.0546875, "learning_rate": 0.0011647509397096378, "loss": 1.2186, "step": 7419 }, { "epoch": 0.6506729658591259, "grad_norm": 0.0673828125, "learning_rate": 0.001164370523482272, "loss": 1.2068, "step": 7420 }, { "epoch": 0.6507606576335004, "grad_norm": 0.052734375, "learning_rate": 0.0011639901515430604, "loss": 1.1998, "step": 7421 }, { "epoch": 0.650848349407875, "grad_norm": 0.05029296875, "learning_rate": 0.0011636098239266928, "loss": 1.1875, "step": 7422 }, { "epoch": 0.6509360411822496, "grad_norm": 0.046630859375, "learning_rate": 0.0011632295406678534, "loss": 1.1396, "step": 7423 }, { "epoch": 0.6510237329566241, "grad_norm": 0.049072265625, "learning_rate": 0.0011628493018012236, "loss": 1.1705, "step": 7424 }, { "epoch": 0.6511114247309986, "grad_norm": 0.0537109375, "learning_rate": 0.0011624691073614797, "loss": 1.1463, "step": 7425 }, { "epoch": 0.6511991165053732, "grad_norm": 0.05078125, "learning_rate": 0.001162088957383295, "loss": 1.2037, "step": 7426 }, { "epoch": 0.6512868082797477, "grad_norm": 0.061767578125, "learning_rate": 0.0011617088519013368, "loss": 1.214, "step": 7427 }, { "epoch": 0.6513745000541222, "grad_norm": 0.058837890625, "learning_rate": 0.001161328790950272, "loss": 1.1492, "step": 7428 }, { "epoch": 0.6514621918284969, "grad_norm": 0.06494140625, "learning_rate": 0.001160948774564759, "loss": 1.1988, "step": 7429 }, { "epoch": 0.6515498836028714, "grad_norm": 0.06640625, "learning_rate": 0.0011605688027794563, "loss": 1.1679, "step": 7430 }, { "epoch": 0.6516375753772459, "grad_norm": 0.0595703125, "learning_rate": 0.0011601888756290152, "loss": 1.1832, "step": 7431 }, { "epoch": 0.6517252671516205, "grad_norm": 0.07568359375, "learning_rate": 0.0011598089931480833, "loss": 1.2519, "step": 7432 }, { "epoch": 0.651812958925995, "grad_norm": 0.0634765625, "learning_rate": 0.0011594291553713065, "loss": 1.1699, "step": 7433 }, { "epoch": 0.6519006507003695, "grad_norm": 0.06005859375, "learning_rate": 0.0011590493623333238, "loss": 1.1154, "step": 7434 }, { "epoch": 0.6519883424747441, "grad_norm": 0.07080078125, "learning_rate": 0.0011586696140687722, "loss": 1.2054, "step": 7435 }, { "epoch": 0.6520760342491186, "grad_norm": 0.07666015625, "learning_rate": 0.0011582899106122833, "loss": 1.2025, "step": 7436 }, { "epoch": 0.6521637260234932, "grad_norm": 0.06494140625, "learning_rate": 0.0011579102519984855, "loss": 1.1756, "step": 7437 }, { "epoch": 0.6522514177978678, "grad_norm": 0.0634765625, "learning_rate": 0.0011575306382620022, "loss": 1.2193, "step": 7438 }, { "epoch": 0.6523391095722423, "grad_norm": 0.07275390625, "learning_rate": 0.0011571510694374533, "loss": 1.1915, "step": 7439 }, { "epoch": 0.6524268013466168, "grad_norm": 0.0849609375, "learning_rate": 0.0011567715455594548, "loss": 1.1798, "step": 7440 }, { "epoch": 0.6525144931209913, "grad_norm": 0.0703125, "learning_rate": 0.001156392066662618, "loss": 1.1597, "step": 7441 }, { "epoch": 0.6526021848953659, "grad_norm": 0.10009765625, "learning_rate": 0.0011560126327815503, "loss": 1.1977, "step": 7442 }, { "epoch": 0.6526898766697404, "grad_norm": 0.068359375, "learning_rate": 0.0011556332439508555, "loss": 1.2144, "step": 7443 }, { "epoch": 0.6527775684441149, "grad_norm": 0.057373046875, "learning_rate": 0.0011552539002051326, "loss": 1.127, "step": 7444 }, { "epoch": 0.6528652602184896, "grad_norm": 0.06884765625, "learning_rate": 0.0011548746015789767, "loss": 1.1694, "step": 7445 }, { "epoch": 0.6529529519928641, "grad_norm": 0.09423828125, "learning_rate": 0.0011544953481069794, "loss": 1.1329, "step": 7446 }, { "epoch": 0.6530406437672386, "grad_norm": 0.05078125, "learning_rate": 0.001154116139823727, "loss": 1.192, "step": 7447 }, { "epoch": 0.6531283355416132, "grad_norm": 0.0546875, "learning_rate": 0.0011537369767638025, "loss": 1.1853, "step": 7448 }, { "epoch": 0.6532160273159877, "grad_norm": 0.056396484375, "learning_rate": 0.0011533578589617845, "loss": 1.1693, "step": 7449 }, { "epoch": 0.6533037190903622, "grad_norm": 0.06787109375, "learning_rate": 0.0011529787864522475, "loss": 1.1986, "step": 7450 }, { "epoch": 0.6533914108647368, "grad_norm": 0.05517578125, "learning_rate": 0.0011525997592697625, "loss": 1.1573, "step": 7451 }, { "epoch": 0.6534791026391114, "grad_norm": 0.0498046875, "learning_rate": 0.001152220777448895, "loss": 1.1604, "step": 7452 }, { "epoch": 0.6535667944134859, "grad_norm": 0.06884765625, "learning_rate": 0.001151841841024208, "loss": 1.1613, "step": 7453 }, { "epoch": 0.6536544861878605, "grad_norm": 0.06103515625, "learning_rate": 0.001151462950030259, "loss": 1.1517, "step": 7454 }, { "epoch": 0.653742177962235, "grad_norm": 0.051513671875, "learning_rate": 0.001151084104501602, "loss": 1.1751, "step": 7455 }, { "epoch": 0.6538298697366095, "grad_norm": 0.059326171875, "learning_rate": 0.0011507053044727865, "loss": 1.1921, "step": 7456 }, { "epoch": 0.6539175615109841, "grad_norm": 0.0810546875, "learning_rate": 0.0011503265499783588, "loss": 1.2977, "step": 7457 }, { "epoch": 0.6540052532853586, "grad_norm": 0.04931640625, "learning_rate": 0.0011499478410528592, "loss": 1.178, "step": 7458 }, { "epoch": 0.6540929450597331, "grad_norm": 0.053466796875, "learning_rate": 0.001149569177730826, "loss": 1.1726, "step": 7459 }, { "epoch": 0.6541806368341078, "grad_norm": 0.050048828125, "learning_rate": 0.001149190560046792, "loss": 1.1667, "step": 7460 }, { "epoch": 0.6542683286084823, "grad_norm": 0.0751953125, "learning_rate": 0.0011488119880352858, "loss": 1.1653, "step": 7461 }, { "epoch": 0.6543560203828568, "grad_norm": 0.06884765625, "learning_rate": 0.0011484334617308325, "loss": 1.1117, "step": 7462 }, { "epoch": 0.6544437121572314, "grad_norm": 0.068359375, "learning_rate": 0.001148054981167953, "loss": 1.1865, "step": 7463 }, { "epoch": 0.6545314039316059, "grad_norm": 0.0771484375, "learning_rate": 0.0011476765463811634, "loss": 1.1113, "step": 7464 }, { "epoch": 0.6546190957059804, "grad_norm": 0.11669921875, "learning_rate": 0.0011472981574049764, "loss": 1.2389, "step": 7465 }, { "epoch": 0.6547067874803549, "grad_norm": 0.0595703125, "learning_rate": 0.001146919814273899, "loss": 1.1514, "step": 7466 }, { "epoch": 0.6547944792547296, "grad_norm": 0.115234375, "learning_rate": 0.0011465415170224362, "loss": 1.1432, "step": 7467 }, { "epoch": 0.6548821710291041, "grad_norm": 0.10498046875, "learning_rate": 0.0011461632656850872, "loss": 1.1856, "step": 7468 }, { "epoch": 0.6549698628034786, "grad_norm": 0.06591796875, "learning_rate": 0.0011457850602963479, "loss": 1.1799, "step": 7469 }, { "epoch": 0.6550575545778532, "grad_norm": 0.07470703125, "learning_rate": 0.0011454069008907093, "loss": 1.1201, "step": 7470 }, { "epoch": 0.6551452463522277, "grad_norm": 0.10986328125, "learning_rate": 0.0011450287875026592, "loss": 1.1505, "step": 7471 }, { "epoch": 0.6552329381266022, "grad_norm": 0.125, "learning_rate": 0.0011446507201666792, "loss": 1.1851, "step": 7472 }, { "epoch": 0.6553206299009768, "grad_norm": 0.06884765625, "learning_rate": 0.0011442726989172498, "loss": 1.1489, "step": 7473 }, { "epoch": 0.6554083216753513, "grad_norm": 0.09912109375, "learning_rate": 0.0011438947237888443, "loss": 1.1275, "step": 7474 }, { "epoch": 0.6554960134497259, "grad_norm": 0.119140625, "learning_rate": 0.0011435167948159336, "loss": 1.1489, "step": 7475 }, { "epoch": 0.6555837052241005, "grad_norm": 0.06591796875, "learning_rate": 0.0011431389120329838, "loss": 1.1456, "step": 7476 }, { "epoch": 0.655671396998475, "grad_norm": 0.076171875, "learning_rate": 0.0011427610754744567, "loss": 1.232, "step": 7477 }, { "epoch": 0.6557590887728495, "grad_norm": 0.125, "learning_rate": 0.0011423832851748098, "loss": 1.1117, "step": 7478 }, { "epoch": 0.6558467805472241, "grad_norm": 0.06689453125, "learning_rate": 0.0011420055411684976, "loss": 1.1342, "step": 7479 }, { "epoch": 0.6559344723215986, "grad_norm": 0.054931640625, "learning_rate": 0.0011416278434899679, "loss": 1.1098, "step": 7480 }, { "epoch": 0.6560221640959731, "grad_norm": 0.06982421875, "learning_rate": 0.0011412501921736673, "loss": 1.181, "step": 7481 }, { "epoch": 0.6561098558703478, "grad_norm": 0.0546875, "learning_rate": 0.0011408725872540354, "loss": 1.1896, "step": 7482 }, { "epoch": 0.6561975476447223, "grad_norm": 0.056640625, "learning_rate": 0.0011404950287655093, "loss": 1.1742, "step": 7483 }, { "epoch": 0.6562852394190968, "grad_norm": 0.078125, "learning_rate": 0.0011401175167425216, "loss": 1.2006, "step": 7484 }, { "epoch": 0.6563729311934714, "grad_norm": 0.05419921875, "learning_rate": 0.0011397400512194993, "loss": 1.1255, "step": 7485 }, { "epoch": 0.6564606229678459, "grad_norm": 0.0498046875, "learning_rate": 0.0011393626322308674, "loss": 1.1637, "step": 7486 }, { "epoch": 0.6565483147422204, "grad_norm": 0.056640625, "learning_rate": 0.0011389852598110454, "loss": 1.12, "step": 7487 }, { "epoch": 0.6566360065165949, "grad_norm": 0.0673828125, "learning_rate": 0.0011386079339944487, "loss": 1.1633, "step": 7488 }, { "epoch": 0.6567236982909695, "grad_norm": 0.052490234375, "learning_rate": 0.0011382306548154883, "loss": 1.0856, "step": 7489 }, { "epoch": 0.6568113900653441, "grad_norm": 0.08984375, "learning_rate": 0.0011378534223085706, "loss": 1.1919, "step": 7490 }, { "epoch": 0.6568990818397186, "grad_norm": 0.06201171875, "learning_rate": 0.001137476236508099, "loss": 1.1802, "step": 7491 }, { "epoch": 0.6569867736140932, "grad_norm": 0.07666015625, "learning_rate": 0.001137099097448471, "loss": 1.2084, "step": 7492 }, { "epoch": 0.6570744653884677, "grad_norm": 0.06689453125, "learning_rate": 0.0011367220051640822, "loss": 1.2107, "step": 7493 }, { "epoch": 0.6571621571628422, "grad_norm": 0.060302734375, "learning_rate": 0.001136344959689321, "loss": 1.1959, "step": 7494 }, { "epoch": 0.6572498489372168, "grad_norm": 0.0595703125, "learning_rate": 0.0011359679610585735, "loss": 1.1742, "step": 7495 }, { "epoch": 0.6573375407115913, "grad_norm": 0.05126953125, "learning_rate": 0.001135591009306221, "loss": 1.2141, "step": 7496 }, { "epoch": 0.6574252324859658, "grad_norm": 0.053955078125, "learning_rate": 0.0011352141044666405, "loss": 1.1248, "step": 7497 }, { "epoch": 0.6575129242603405, "grad_norm": 0.062255859375, "learning_rate": 0.0011348372465742051, "loss": 1.1449, "step": 7498 }, { "epoch": 0.657600616034715, "grad_norm": 0.0634765625, "learning_rate": 0.001134460435663283, "loss": 1.2216, "step": 7499 }, { "epoch": 0.6576883078090895, "grad_norm": 0.08203125, "learning_rate": 0.0011340836717682377, "loss": 1.2056, "step": 7500 }, { "epoch": 0.6576883078090895, "eval_loss": 1.187709927558899, "eval_runtime": 429.1557, "eval_samples_per_second": 33.664, "eval_steps_per_second": 8.417, "step": 7500 }, { "epoch": 0.6577759995834641, "grad_norm": 0.06298828125, "learning_rate": 0.00113370695492343, "loss": 1.1435, "step": 7501 }, { "epoch": 0.6578636913578386, "grad_norm": 0.08544921875, "learning_rate": 0.001133330285163215, "loss": 1.181, "step": 7502 }, { "epoch": 0.6579513831322131, "grad_norm": 0.054931640625, "learning_rate": 0.0011329536625219441, "loss": 1.1615, "step": 7503 }, { "epoch": 0.6580390749065878, "grad_norm": 0.0859375, "learning_rate": 0.001132577087033965, "loss": 1.1845, "step": 7504 }, { "epoch": 0.6581267666809623, "grad_norm": 0.0849609375, "learning_rate": 0.0011322005587336196, "loss": 1.1782, "step": 7505 }, { "epoch": 0.6582144584553368, "grad_norm": 0.06396484375, "learning_rate": 0.0011318240776552466, "loss": 1.1459, "step": 7506 }, { "epoch": 0.6583021502297114, "grad_norm": 0.07958984375, "learning_rate": 0.0011314476438331797, "loss": 1.2108, "step": 7507 }, { "epoch": 0.6583898420040859, "grad_norm": 0.10986328125, "learning_rate": 0.0011310712573017496, "loss": 1.1499, "step": 7508 }, { "epoch": 0.6584775337784604, "grad_norm": 0.0712890625, "learning_rate": 0.0011306949180952806, "loss": 1.1394, "step": 7509 }, { "epoch": 0.658565225552835, "grad_norm": 0.058837890625, "learning_rate": 0.001130318626248095, "loss": 1.2221, "step": 7510 }, { "epoch": 0.6586529173272095, "grad_norm": 0.0732421875, "learning_rate": 0.0011299423817945087, "loss": 1.1375, "step": 7511 }, { "epoch": 0.658740609101584, "grad_norm": 0.07470703125, "learning_rate": 0.0011295661847688346, "loss": 1.178, "step": 7512 }, { "epoch": 0.6588283008759586, "grad_norm": 0.048828125, "learning_rate": 0.0011291900352053815, "loss": 1.2271, "step": 7513 }, { "epoch": 0.6589159926503332, "grad_norm": 0.076171875, "learning_rate": 0.0011288139331384526, "loss": 1.1632, "step": 7514 }, { "epoch": 0.6590036844247077, "grad_norm": 0.064453125, "learning_rate": 0.0011284378786023477, "loss": 1.1486, "step": 7515 }, { "epoch": 0.6590913761990822, "grad_norm": 0.06298828125, "learning_rate": 0.0011280618716313616, "loss": 1.157, "step": 7516 }, { "epoch": 0.6591790679734568, "grad_norm": 0.07080078125, "learning_rate": 0.0011276859122597853, "loss": 1.1971, "step": 7517 }, { "epoch": 0.6592667597478313, "grad_norm": 0.0673828125, "learning_rate": 0.0011273100005219057, "loss": 1.1419, "step": 7518 }, { "epoch": 0.6593544515222058, "grad_norm": 0.0498046875, "learning_rate": 0.0011269341364520047, "loss": 1.1849, "step": 7519 }, { "epoch": 0.6594421432965805, "grad_norm": 0.057373046875, "learning_rate": 0.00112655832008436, "loss": 1.174, "step": 7520 }, { "epoch": 0.659529835070955, "grad_norm": 0.05908203125, "learning_rate": 0.0011261825514532454, "loss": 1.1583, "step": 7521 }, { "epoch": 0.6596175268453295, "grad_norm": 0.0498046875, "learning_rate": 0.00112580683059293, "loss": 1.1534, "step": 7522 }, { "epoch": 0.6597052186197041, "grad_norm": 0.047607421875, "learning_rate": 0.0011254311575376779, "loss": 1.1625, "step": 7523 }, { "epoch": 0.6597929103940786, "grad_norm": 0.06103515625, "learning_rate": 0.0011250555323217507, "loss": 1.1464, "step": 7524 }, { "epoch": 0.6598806021684531, "grad_norm": 0.07470703125, "learning_rate": 0.0011246799549794032, "loss": 1.1911, "step": 7525 }, { "epoch": 0.6599682939428277, "grad_norm": 0.056640625, "learning_rate": 0.0011243044255448884, "loss": 1.1818, "step": 7526 }, { "epoch": 0.6600559857172023, "grad_norm": 0.048583984375, "learning_rate": 0.0011239289440524523, "loss": 1.2182, "step": 7527 }, { "epoch": 0.6601436774915768, "grad_norm": 0.07666015625, "learning_rate": 0.001123553510536339, "loss": 1.1735, "step": 7528 }, { "epoch": 0.6602313692659514, "grad_norm": 0.080078125, "learning_rate": 0.001123178125030786, "loss": 1.1408, "step": 7529 }, { "epoch": 0.6603190610403259, "grad_norm": 0.064453125, "learning_rate": 0.0011228027875700285, "loss": 1.172, "step": 7530 }, { "epoch": 0.6604067528147004, "grad_norm": 0.072265625, "learning_rate": 0.0011224274981882954, "loss": 1.1981, "step": 7531 }, { "epoch": 0.660494444589075, "grad_norm": 0.107421875, "learning_rate": 0.0011220522569198135, "loss": 1.2196, "step": 7532 }, { "epoch": 0.6605821363634495, "grad_norm": 0.08935546875, "learning_rate": 0.0011216770637988017, "loss": 1.1581, "step": 7533 }, { "epoch": 0.660669828137824, "grad_norm": 0.049072265625, "learning_rate": 0.001121301918859478, "loss": 1.1633, "step": 7534 }, { "epoch": 0.6607575199121987, "grad_norm": 0.1005859375, "learning_rate": 0.001120926822136055, "loss": 1.1928, "step": 7535 }, { "epoch": 0.6608452116865732, "grad_norm": 0.07421875, "learning_rate": 0.0011205517736627398, "loss": 1.1636, "step": 7536 }, { "epoch": 0.6609329034609477, "grad_norm": 0.05908203125, "learning_rate": 0.0011201767734737359, "loss": 1.1398, "step": 7537 }, { "epoch": 0.6610205952353222, "grad_norm": 0.064453125, "learning_rate": 0.0011198018216032428, "loss": 1.1454, "step": 7538 }, { "epoch": 0.6611082870096968, "grad_norm": 0.0849609375, "learning_rate": 0.0011194269180854546, "loss": 1.1199, "step": 7539 }, { "epoch": 0.6611959787840713, "grad_norm": 0.06689453125, "learning_rate": 0.0011190520629545619, "loss": 1.1769, "step": 7540 }, { "epoch": 0.6612836705584458, "grad_norm": 0.052734375, "learning_rate": 0.0011186772562447502, "loss": 1.175, "step": 7541 }, { "epoch": 0.6613713623328205, "grad_norm": 0.059814453125, "learning_rate": 0.0011183024979902014, "loss": 1.2081, "step": 7542 }, { "epoch": 0.661459054107195, "grad_norm": 0.07080078125, "learning_rate": 0.001117927788225092, "loss": 1.1825, "step": 7543 }, { "epoch": 0.6615467458815695, "grad_norm": 0.0791015625, "learning_rate": 0.0011175531269835952, "loss": 1.2399, "step": 7544 }, { "epoch": 0.6616344376559441, "grad_norm": 0.051025390625, "learning_rate": 0.0011171785142998784, "loss": 1.1632, "step": 7545 }, { "epoch": 0.6617221294303186, "grad_norm": 0.0517578125, "learning_rate": 0.0011168039502081055, "loss": 1.1534, "step": 7546 }, { "epoch": 0.6618098212046931, "grad_norm": 0.061767578125, "learning_rate": 0.001116429434742436, "loss": 1.2176, "step": 7547 }, { "epoch": 0.6618975129790677, "grad_norm": 0.05029296875, "learning_rate": 0.001116054967937025, "loss": 1.1432, "step": 7548 }, { "epoch": 0.6619852047534422, "grad_norm": 0.045166015625, "learning_rate": 0.0011156805498260226, "loss": 1.1557, "step": 7549 }, { "epoch": 0.6620728965278168, "grad_norm": 0.055908203125, "learning_rate": 0.0011153061804435743, "loss": 1.1761, "step": 7550 }, { "epoch": 0.6621605883021914, "grad_norm": 0.056640625, "learning_rate": 0.0011149318598238226, "loss": 1.1451, "step": 7551 }, { "epoch": 0.6622482800765659, "grad_norm": 0.052490234375, "learning_rate": 0.0011145575880009034, "loss": 1.2401, "step": 7552 }, { "epoch": 0.6623359718509404, "grad_norm": 0.055908203125, "learning_rate": 0.0011141833650089499, "loss": 1.1484, "step": 7553 }, { "epoch": 0.662423663625315, "grad_norm": 0.056640625, "learning_rate": 0.0011138091908820906, "loss": 1.2005, "step": 7554 }, { "epoch": 0.6625113553996895, "grad_norm": 0.046142578125, "learning_rate": 0.001113435065654449, "loss": 1.162, "step": 7555 }, { "epoch": 0.662599047174064, "grad_norm": 0.05810546875, "learning_rate": 0.0011130609893601439, "loss": 1.1325, "step": 7556 }, { "epoch": 0.6626867389484387, "grad_norm": 0.053466796875, "learning_rate": 0.0011126869620332908, "loss": 1.1745, "step": 7557 }, { "epoch": 0.6627744307228132, "grad_norm": 0.0439453125, "learning_rate": 0.0011123129837079992, "loss": 1.1701, "step": 7558 }, { "epoch": 0.6628621224971877, "grad_norm": 0.06396484375, "learning_rate": 0.001111939054418376, "loss": 1.2178, "step": 7559 }, { "epoch": 0.6629498142715622, "grad_norm": 0.0615234375, "learning_rate": 0.0011115651741985218, "loss": 1.1807, "step": 7560 }, { "epoch": 0.6630375060459368, "grad_norm": 0.057373046875, "learning_rate": 0.001111191343082534, "loss": 1.1921, "step": 7561 }, { "epoch": 0.6631251978203113, "grad_norm": 0.048828125, "learning_rate": 0.0011108175611045045, "loss": 1.1905, "step": 7562 }, { "epoch": 0.6632128895946858, "grad_norm": 0.0712890625, "learning_rate": 0.0011104438282985218, "loss": 1.1684, "step": 7563 }, { "epoch": 0.6633005813690604, "grad_norm": 0.056884765625, "learning_rate": 0.001110070144698669, "loss": 1.1485, "step": 7564 }, { "epoch": 0.663388273143435, "grad_norm": 0.054443359375, "learning_rate": 0.0011096965103390246, "loss": 1.188, "step": 7565 }, { "epoch": 0.6634759649178095, "grad_norm": 0.099609375, "learning_rate": 0.0011093229252536647, "loss": 1.1679, "step": 7566 }, { "epoch": 0.6635636566921841, "grad_norm": 0.076171875, "learning_rate": 0.0011089493894766579, "loss": 1.1412, "step": 7567 }, { "epoch": 0.6636513484665586, "grad_norm": 0.0517578125, "learning_rate": 0.0011085759030420698, "loss": 1.1892, "step": 7568 }, { "epoch": 0.6637390402409331, "grad_norm": 0.056396484375, "learning_rate": 0.001108202465983962, "loss": 1.1858, "step": 7569 }, { "epoch": 0.6638267320153077, "grad_norm": 0.052978515625, "learning_rate": 0.0011078290783363904, "loss": 1.1783, "step": 7570 }, { "epoch": 0.6639144237896822, "grad_norm": 0.07080078125, "learning_rate": 0.0011074557401334073, "loss": 1.1856, "step": 7571 }, { "epoch": 0.6640021155640567, "grad_norm": 0.0537109375, "learning_rate": 0.00110708245140906, "loss": 1.186, "step": 7572 }, { "epoch": 0.6640898073384314, "grad_norm": 0.08642578125, "learning_rate": 0.0011067092121973916, "loss": 1.1992, "step": 7573 }, { "epoch": 0.6641774991128059, "grad_norm": 0.053955078125, "learning_rate": 0.0011063360225324406, "loss": 1.1313, "step": 7574 }, { "epoch": 0.6642651908871804, "grad_norm": 0.056884765625, "learning_rate": 0.001105962882448241, "loss": 1.1907, "step": 7575 }, { "epoch": 0.664352882661555, "grad_norm": 0.0732421875, "learning_rate": 0.0011055897919788219, "loss": 1.1324, "step": 7576 }, { "epoch": 0.6644405744359295, "grad_norm": 0.049560546875, "learning_rate": 0.0011052167511582088, "loss": 1.2461, "step": 7577 }, { "epoch": 0.664528266210304, "grad_norm": 0.04931640625, "learning_rate": 0.0011048437600204213, "loss": 1.1121, "step": 7578 }, { "epoch": 0.6646159579846787, "grad_norm": 0.0576171875, "learning_rate": 0.0011044708185994757, "loss": 1.1923, "step": 7579 }, { "epoch": 0.6647036497590532, "grad_norm": 0.052734375, "learning_rate": 0.001104097926929383, "loss": 1.1887, "step": 7580 }, { "epoch": 0.6647913415334277, "grad_norm": 0.0634765625, "learning_rate": 0.0011037250850441502, "loss": 1.1408, "step": 7581 }, { "epoch": 0.6648790333078023, "grad_norm": 0.047607421875, "learning_rate": 0.0011033522929777793, "loss": 1.1701, "step": 7582 }, { "epoch": 0.6649667250821768, "grad_norm": 0.05029296875, "learning_rate": 0.0011029795507642686, "loss": 1.1664, "step": 7583 }, { "epoch": 0.6650544168565513, "grad_norm": 0.04736328125, "learning_rate": 0.0011026068584376106, "loss": 1.1017, "step": 7584 }, { "epoch": 0.6651421086309258, "grad_norm": 0.0546875, "learning_rate": 0.0011022342160317941, "loss": 1.2154, "step": 7585 }, { "epoch": 0.6652298004053004, "grad_norm": 0.0654296875, "learning_rate": 0.0011018616235808029, "loss": 1.1788, "step": 7586 }, { "epoch": 0.665317492179675, "grad_norm": 0.048583984375, "learning_rate": 0.0011014890811186161, "loss": 1.1717, "step": 7587 }, { "epoch": 0.6654051839540495, "grad_norm": 0.0712890625, "learning_rate": 0.0011011165886792098, "loss": 1.1717, "step": 7588 }, { "epoch": 0.6654928757284241, "grad_norm": 0.0712890625, "learning_rate": 0.0011007441462965535, "loss": 1.218, "step": 7589 }, { "epoch": 0.6655805675027986, "grad_norm": 0.0546875, "learning_rate": 0.001100371754004613, "loss": 1.106, "step": 7590 }, { "epoch": 0.6656682592771731, "grad_norm": 0.0986328125, "learning_rate": 0.0010999994118373495, "loss": 1.1708, "step": 7591 }, { "epoch": 0.6657559510515477, "grad_norm": 0.080078125, "learning_rate": 0.0010996271198287202, "loss": 1.1525, "step": 7592 }, { "epoch": 0.6658436428259222, "grad_norm": 0.07470703125, "learning_rate": 0.0010992548780126767, "loss": 1.1237, "step": 7593 }, { "epoch": 0.6659313346002967, "grad_norm": 0.08154296875, "learning_rate": 0.0010988826864231664, "loss": 1.1485, "step": 7594 }, { "epoch": 0.6660190263746714, "grad_norm": 0.058837890625, "learning_rate": 0.0010985105450941328, "loss": 1.1746, "step": 7595 }, { "epoch": 0.6661067181490459, "grad_norm": 0.068359375, "learning_rate": 0.0010981384540595133, "loss": 1.1688, "step": 7596 }, { "epoch": 0.6661944099234204, "grad_norm": 0.057373046875, "learning_rate": 0.0010977664133532425, "loss": 1.1655, "step": 7597 }, { "epoch": 0.666282101697795, "grad_norm": 0.051513671875, "learning_rate": 0.0010973944230092492, "loss": 1.1759, "step": 7598 }, { "epoch": 0.6663697934721695, "grad_norm": 0.05126953125, "learning_rate": 0.001097022483061458, "loss": 1.1568, "step": 7599 }, { "epoch": 0.666457485246544, "grad_norm": 0.055419921875, "learning_rate": 0.0010966505935437892, "loss": 1.1502, "step": 7600 }, { "epoch": 0.6665451770209186, "grad_norm": 0.045166015625, "learning_rate": 0.0010962787544901574, "loss": 1.1722, "step": 7601 }, { "epoch": 0.6666328687952932, "grad_norm": 0.062255859375, "learning_rate": 0.001095906965934474, "loss": 1.1666, "step": 7602 }, { "epoch": 0.6667205605696677, "grad_norm": 0.048095703125, "learning_rate": 0.0010955352279106443, "loss": 1.1786, "step": 7603 }, { "epoch": 0.6668082523440423, "grad_norm": 0.050537109375, "learning_rate": 0.0010951635404525708, "loss": 1.1403, "step": 7604 }, { "epoch": 0.6668959441184168, "grad_norm": 0.0625, "learning_rate": 0.0010947919035941504, "loss": 1.2193, "step": 7605 }, { "epoch": 0.6669836358927913, "grad_norm": 0.06787109375, "learning_rate": 0.0010944203173692748, "loss": 1.178, "step": 7606 }, { "epoch": 0.6670713276671659, "grad_norm": 0.048583984375, "learning_rate": 0.001094048781811832, "loss": 1.1369, "step": 7607 }, { "epoch": 0.6671590194415404, "grad_norm": 0.05322265625, "learning_rate": 0.0010936772969557052, "loss": 1.133, "step": 7608 }, { "epoch": 0.667246711215915, "grad_norm": 0.064453125, "learning_rate": 0.0010933058628347727, "loss": 1.1576, "step": 7609 }, { "epoch": 0.6673344029902895, "grad_norm": 0.0791015625, "learning_rate": 0.001092934479482909, "loss": 1.1775, "step": 7610 }, { "epoch": 0.6674220947646641, "grad_norm": 0.06787109375, "learning_rate": 0.001092563146933982, "loss": 1.1364, "step": 7611 }, { "epoch": 0.6675097865390386, "grad_norm": 0.08203125, "learning_rate": 0.0010921918652218574, "loss": 1.1855, "step": 7612 }, { "epoch": 0.6675974783134131, "grad_norm": 0.06396484375, "learning_rate": 0.0010918206343803944, "loss": 1.1494, "step": 7613 }, { "epoch": 0.6676851700877877, "grad_norm": 0.05810546875, "learning_rate": 0.0010914494544434488, "loss": 1.1877, "step": 7614 }, { "epoch": 0.6677728618621622, "grad_norm": 0.072265625, "learning_rate": 0.001091078325444871, "loss": 1.1421, "step": 7615 }, { "epoch": 0.6678605536365367, "grad_norm": 0.05615234375, "learning_rate": 0.0010907072474185071, "loss": 1.1328, "step": 7616 }, { "epoch": 0.6679482454109114, "grad_norm": 0.062255859375, "learning_rate": 0.001090336220398199, "loss": 1.16, "step": 7617 }, { "epoch": 0.6680359371852859, "grad_norm": 0.049072265625, "learning_rate": 0.001089965244417782, "loss": 1.1462, "step": 7618 }, { "epoch": 0.6681236289596604, "grad_norm": 0.095703125, "learning_rate": 0.0010895943195110894, "loss": 1.1861, "step": 7619 }, { "epoch": 0.668211320734035, "grad_norm": 0.049560546875, "learning_rate": 0.0010892234457119479, "loss": 1.1705, "step": 7620 }, { "epoch": 0.6682990125084095, "grad_norm": 0.04638671875, "learning_rate": 0.0010888526230541805, "loss": 1.1279, "step": 7621 }, { "epoch": 0.668386704282784, "grad_norm": 0.0537109375, "learning_rate": 0.0010884818515716056, "loss": 1.1485, "step": 7622 }, { "epoch": 0.6684743960571586, "grad_norm": 0.049072265625, "learning_rate": 0.0010881111312980361, "loss": 1.1847, "step": 7623 }, { "epoch": 0.6685620878315331, "grad_norm": 0.048828125, "learning_rate": 0.001087740462267281, "loss": 1.1499, "step": 7624 }, { "epoch": 0.6686497796059077, "grad_norm": 0.05810546875, "learning_rate": 0.001087369844513144, "loss": 1.2355, "step": 7625 }, { "epoch": 0.6687374713802823, "grad_norm": 0.0458984375, "learning_rate": 0.0010869992780694246, "loss": 1.1362, "step": 7626 }, { "epoch": 0.6688251631546568, "grad_norm": 0.06591796875, "learning_rate": 0.0010866287629699187, "loss": 1.1994, "step": 7627 }, { "epoch": 0.6689128549290313, "grad_norm": 0.05615234375, "learning_rate": 0.0010862582992484146, "loss": 1.1553, "step": 7628 }, { "epoch": 0.6690005467034059, "grad_norm": 0.049072265625, "learning_rate": 0.0010858878869386976, "loss": 1.1266, "step": 7629 }, { "epoch": 0.6690882384777804, "grad_norm": 0.054443359375, "learning_rate": 0.00108551752607455, "loss": 1.1597, "step": 7630 }, { "epoch": 0.6691759302521549, "grad_norm": 0.04638671875, "learning_rate": 0.0010851472166897467, "loss": 1.1467, "step": 7631 }, { "epoch": 0.6692636220265294, "grad_norm": 0.0478515625, "learning_rate": 0.001084776958818059, "loss": 1.2136, "step": 7632 }, { "epoch": 0.6693513138009041, "grad_norm": 0.052978515625, "learning_rate": 0.0010844067524932535, "loss": 1.192, "step": 7633 }, { "epoch": 0.6694390055752786, "grad_norm": 0.049072265625, "learning_rate": 0.0010840365977490924, "loss": 1.1873, "step": 7634 }, { "epoch": 0.6695266973496531, "grad_norm": 0.058349609375, "learning_rate": 0.0010836664946193323, "loss": 1.1756, "step": 7635 }, { "epoch": 0.6696143891240277, "grad_norm": 0.045654296875, "learning_rate": 0.0010832964431377263, "loss": 1.1337, "step": 7636 }, { "epoch": 0.6697020808984022, "grad_norm": 0.056640625, "learning_rate": 0.001082926443338021, "loss": 1.1027, "step": 7637 }, { "epoch": 0.6697897726727767, "grad_norm": 0.053955078125, "learning_rate": 0.0010825564952539604, "loss": 1.1865, "step": 7638 }, { "epoch": 0.6698774644471513, "grad_norm": 0.052001953125, "learning_rate": 0.0010821865989192826, "loss": 1.1582, "step": 7639 }, { "epoch": 0.6699651562215259, "grad_norm": 0.047607421875, "learning_rate": 0.0010818167543677217, "loss": 1.1697, "step": 7640 }, { "epoch": 0.6700528479959004, "grad_norm": 0.048095703125, "learning_rate": 0.0010814469616330054, "loss": 1.1303, "step": 7641 }, { "epoch": 0.670140539770275, "grad_norm": 0.06103515625, "learning_rate": 0.0010810772207488586, "loss": 1.2007, "step": 7642 }, { "epoch": 0.6702282315446495, "grad_norm": 0.050537109375, "learning_rate": 0.0010807075317490003, "loss": 1.1674, "step": 7643 }, { "epoch": 0.670315923319024, "grad_norm": 0.06005859375, "learning_rate": 0.0010803378946671466, "loss": 1.1746, "step": 7644 }, { "epoch": 0.6704036150933986, "grad_norm": 0.0556640625, "learning_rate": 0.0010799683095370051, "loss": 1.214, "step": 7645 }, { "epoch": 0.6704913068677731, "grad_norm": 0.05126953125, "learning_rate": 0.001079598776392283, "loss": 1.2005, "step": 7646 }, { "epoch": 0.6705789986421477, "grad_norm": 0.08056640625, "learning_rate": 0.0010792292952666795, "loss": 1.1531, "step": 7647 }, { "epoch": 0.6706666904165223, "grad_norm": 0.054443359375, "learning_rate": 0.001078859866193891, "loss": 1.156, "step": 7648 }, { "epoch": 0.6707543821908968, "grad_norm": 0.060302734375, "learning_rate": 0.0010784904892076088, "loss": 1.1665, "step": 7649 }, { "epoch": 0.6708420739652713, "grad_norm": 0.05322265625, "learning_rate": 0.0010781211643415183, "loss": 1.144, "step": 7650 }, { "epoch": 0.6709297657396459, "grad_norm": 0.0712890625, "learning_rate": 0.0010777518916293015, "loss": 1.171, "step": 7651 }, { "epoch": 0.6710174575140204, "grad_norm": 0.064453125, "learning_rate": 0.0010773826711046345, "loss": 1.1814, "step": 7652 }, { "epoch": 0.6711051492883949, "grad_norm": 0.0498046875, "learning_rate": 0.0010770135028011897, "loss": 1.1924, "step": 7653 }, { "epoch": 0.6711928410627696, "grad_norm": 0.056396484375, "learning_rate": 0.001076644386752635, "loss": 1.1468, "step": 7654 }, { "epoch": 0.6712805328371441, "grad_norm": 0.049072265625, "learning_rate": 0.0010762753229926316, "loss": 1.1234, "step": 7655 }, { "epoch": 0.6713682246115186, "grad_norm": 0.0517578125, "learning_rate": 0.0010759063115548374, "loss": 1.1978, "step": 7656 }, { "epoch": 0.6714559163858931, "grad_norm": 0.057373046875, "learning_rate": 0.0010755373524729056, "loss": 1.1702, "step": 7657 }, { "epoch": 0.6715436081602677, "grad_norm": 0.058349609375, "learning_rate": 0.0010751684457804848, "loss": 1.1525, "step": 7658 }, { "epoch": 0.6716312999346422, "grad_norm": 0.058837890625, "learning_rate": 0.0010747995915112174, "loss": 1.1314, "step": 7659 }, { "epoch": 0.6717189917090167, "grad_norm": 0.052734375, "learning_rate": 0.0010744307896987425, "loss": 1.179, "step": 7660 }, { "epoch": 0.6718066834833913, "grad_norm": 0.09326171875, "learning_rate": 0.0010740620403766937, "loss": 1.2176, "step": 7661 }, { "epoch": 0.6718943752577659, "grad_norm": 0.059814453125, "learning_rate": 0.0010736933435787006, "loss": 1.1695, "step": 7662 }, { "epoch": 0.6719820670321404, "grad_norm": 0.046142578125, "learning_rate": 0.0010733246993383863, "loss": 1.162, "step": 7663 }, { "epoch": 0.672069758806515, "grad_norm": 0.0478515625, "learning_rate": 0.0010729561076893708, "loss": 1.1512, "step": 7664 }, { "epoch": 0.6721574505808895, "grad_norm": 0.055419921875, "learning_rate": 0.0010725875686652681, "loss": 1.2197, "step": 7665 }, { "epoch": 0.672245142355264, "grad_norm": 0.048583984375, "learning_rate": 0.00107221908229969, "loss": 1.1518, "step": 7666 }, { "epoch": 0.6723328341296386, "grad_norm": 0.05712890625, "learning_rate": 0.001071850648626239, "loss": 1.1769, "step": 7667 }, { "epoch": 0.6724205259040131, "grad_norm": 0.04638671875, "learning_rate": 0.0010714822676785168, "loss": 1.1645, "step": 7668 }, { "epoch": 0.6725082176783876, "grad_norm": 0.046630859375, "learning_rate": 0.001071113939490118, "loss": 1.1282, "step": 7669 }, { "epoch": 0.6725959094527623, "grad_norm": 0.05712890625, "learning_rate": 0.0010707456640946334, "loss": 1.1512, "step": 7670 }, { "epoch": 0.6726836012271368, "grad_norm": 0.050048828125, "learning_rate": 0.0010703774415256496, "loss": 1.2496, "step": 7671 }, { "epoch": 0.6727712930015113, "grad_norm": 0.06884765625, "learning_rate": 0.0010700092718167462, "loss": 1.1348, "step": 7672 }, { "epoch": 0.6728589847758859, "grad_norm": 0.0546875, "learning_rate": 0.0010696411550014998, "loss": 1.1024, "step": 7673 }, { "epoch": 0.6729466765502604, "grad_norm": 0.050537109375, "learning_rate": 0.0010692730911134821, "loss": 1.1927, "step": 7674 }, { "epoch": 0.6730343683246349, "grad_norm": 0.0712890625, "learning_rate": 0.0010689050801862595, "loss": 1.1182, "step": 7675 }, { "epoch": 0.6731220600990095, "grad_norm": 0.06884765625, "learning_rate": 0.0010685371222533939, "loss": 1.1372, "step": 7676 }, { "epoch": 0.673209751873384, "grad_norm": 0.046875, "learning_rate": 0.0010681692173484412, "loss": 1.1415, "step": 7677 }, { "epoch": 0.6732974436477586, "grad_norm": 0.05615234375, "learning_rate": 0.0010678013655049537, "loss": 1.1375, "step": 7678 }, { "epoch": 0.6733851354221332, "grad_norm": 0.08740234375, "learning_rate": 0.0010674335667564788, "loss": 1.2059, "step": 7679 }, { "epoch": 0.6734728271965077, "grad_norm": 0.04736328125, "learning_rate": 0.0010670658211365592, "loss": 1.2143, "step": 7680 }, { "epoch": 0.6735605189708822, "grad_norm": 0.05029296875, "learning_rate": 0.0010666981286787316, "loss": 1.1681, "step": 7681 }, { "epoch": 0.6736482107452567, "grad_norm": 0.052001953125, "learning_rate": 0.0010663304894165287, "loss": 1.1537, "step": 7682 }, { "epoch": 0.6737359025196313, "grad_norm": 0.068359375, "learning_rate": 0.0010659629033834786, "loss": 1.222, "step": 7683 }, { "epoch": 0.6738235942940058, "grad_norm": 0.048828125, "learning_rate": 0.0010655953706131044, "loss": 1.133, "step": 7684 }, { "epoch": 0.6739112860683804, "grad_norm": 0.06201171875, "learning_rate": 0.0010652278911389237, "loss": 1.1721, "step": 7685 }, { "epoch": 0.673998977842755, "grad_norm": 0.053955078125, "learning_rate": 0.0010648604649944494, "loss": 1.169, "step": 7686 }, { "epoch": 0.6740866696171295, "grad_norm": 0.0498046875, "learning_rate": 0.00106449309221319, "loss": 1.1263, "step": 7687 }, { "epoch": 0.674174361391504, "grad_norm": 0.06591796875, "learning_rate": 0.001064125772828649, "loss": 1.1611, "step": 7688 }, { "epoch": 0.6742620531658786, "grad_norm": 0.0537109375, "learning_rate": 0.0010637585068743256, "loss": 1.1271, "step": 7689 }, { "epoch": 0.6743497449402531, "grad_norm": 0.0517578125, "learning_rate": 0.001063391294383713, "loss": 1.1784, "step": 7690 }, { "epoch": 0.6744374367146276, "grad_norm": 0.06689453125, "learning_rate": 0.0010630241353902995, "loss": 1.223, "step": 7691 }, { "epoch": 0.6745251284890023, "grad_norm": 0.05712890625, "learning_rate": 0.00106265702992757, "loss": 1.1829, "step": 7692 }, { "epoch": 0.6746128202633768, "grad_norm": 0.0595703125, "learning_rate": 0.0010622899780290036, "loss": 1.1621, "step": 7693 }, { "epoch": 0.6747005120377513, "grad_norm": 0.0634765625, "learning_rate": 0.0010619229797280735, "loss": 1.1711, "step": 7694 }, { "epoch": 0.6747882038121259, "grad_norm": 0.05029296875, "learning_rate": 0.0010615560350582492, "loss": 1.1713, "step": 7695 }, { "epoch": 0.6748758955865004, "grad_norm": 0.059814453125, "learning_rate": 0.0010611891440529954, "loss": 1.1495, "step": 7696 }, { "epoch": 0.6749635873608749, "grad_norm": 0.04833984375, "learning_rate": 0.0010608223067457726, "loss": 1.1509, "step": 7697 }, { "epoch": 0.6750512791352495, "grad_norm": 0.048095703125, "learning_rate": 0.0010604555231700338, "loss": 1.2224, "step": 7698 }, { "epoch": 0.675138970909624, "grad_norm": 0.05078125, "learning_rate": 0.001060088793359229, "loss": 1.1578, "step": 7699 }, { "epoch": 0.6752266626839986, "grad_norm": 0.0693359375, "learning_rate": 0.0010597221173468039, "loss": 1.1447, "step": 7700 }, { "epoch": 0.6753143544583732, "grad_norm": 0.057861328125, "learning_rate": 0.0010593554951661983, "loss": 1.1636, "step": 7701 }, { "epoch": 0.6754020462327477, "grad_norm": 0.0703125, "learning_rate": 0.001058988926850847, "loss": 1.132, "step": 7702 }, { "epoch": 0.6754897380071222, "grad_norm": 0.083984375, "learning_rate": 0.0010586224124341792, "loss": 1.1624, "step": 7703 }, { "epoch": 0.6755774297814967, "grad_norm": 0.07275390625, "learning_rate": 0.0010582559519496206, "loss": 1.143, "step": 7704 }, { "epoch": 0.6756651215558713, "grad_norm": 0.09619140625, "learning_rate": 0.0010578895454305915, "loss": 1.2315, "step": 7705 }, { "epoch": 0.6757528133302458, "grad_norm": 0.058349609375, "learning_rate": 0.001057523192910508, "loss": 1.1439, "step": 7706 }, { "epoch": 0.6758405051046203, "grad_norm": 0.05810546875, "learning_rate": 0.0010571568944227796, "loss": 1.0811, "step": 7707 }, { "epoch": 0.675928196878995, "grad_norm": 0.0478515625, "learning_rate": 0.0010567906500008116, "loss": 1.1731, "step": 7708 }, { "epoch": 0.6760158886533695, "grad_norm": 0.051025390625, "learning_rate": 0.0010564244596780053, "loss": 1.1968, "step": 7709 }, { "epoch": 0.676103580427744, "grad_norm": 0.056640625, "learning_rate": 0.001056058323487756, "loss": 1.1402, "step": 7710 }, { "epoch": 0.6761912722021186, "grad_norm": 0.051025390625, "learning_rate": 0.0010556922414634545, "loss": 1.1975, "step": 7711 }, { "epoch": 0.6762789639764931, "grad_norm": 0.0498046875, "learning_rate": 0.0010553262136384863, "loss": 1.151, "step": 7712 }, { "epoch": 0.6763666557508676, "grad_norm": 0.05126953125, "learning_rate": 0.0010549602400462318, "loss": 1.1918, "step": 7713 }, { "epoch": 0.6764543475252422, "grad_norm": 0.04736328125, "learning_rate": 0.0010545943207200675, "loss": 1.1365, "step": 7714 }, { "epoch": 0.6765420392996168, "grad_norm": 0.0537109375, "learning_rate": 0.0010542284556933647, "loss": 1.2208, "step": 7715 }, { "epoch": 0.6766297310739913, "grad_norm": 0.045654296875, "learning_rate": 0.0010538626449994883, "loss": 1.1555, "step": 7716 }, { "epoch": 0.6767174228483659, "grad_norm": 0.049560546875, "learning_rate": 0.0010534968886717995, "loss": 1.1364, "step": 7717 }, { "epoch": 0.6768051146227404, "grad_norm": 0.04931640625, "learning_rate": 0.0010531311867436556, "loss": 1.142, "step": 7718 }, { "epoch": 0.6768928063971149, "grad_norm": 0.051025390625, "learning_rate": 0.0010527655392484053, "loss": 1.1851, "step": 7719 }, { "epoch": 0.6769804981714895, "grad_norm": 0.072265625, "learning_rate": 0.0010523999462193974, "loss": 1.1761, "step": 7720 }, { "epoch": 0.677068189945864, "grad_norm": 0.049560546875, "learning_rate": 0.0010520344076899708, "loss": 1.1777, "step": 7721 }, { "epoch": 0.6771558817202386, "grad_norm": 0.04296875, "learning_rate": 0.0010516689236934626, "loss": 1.0896, "step": 7722 }, { "epoch": 0.6772435734946132, "grad_norm": 0.05078125, "learning_rate": 0.0010513034942632043, "loss": 1.1649, "step": 7723 }, { "epoch": 0.6773312652689877, "grad_norm": 0.0498046875, "learning_rate": 0.0010509381194325218, "loss": 1.1221, "step": 7724 }, { "epoch": 0.6774189570433622, "grad_norm": 0.056396484375, "learning_rate": 0.001050572799234736, "loss": 1.1973, "step": 7725 }, { "epoch": 0.6775066488177368, "grad_norm": 0.05419921875, "learning_rate": 0.0010502075337031635, "loss": 1.2064, "step": 7726 }, { "epoch": 0.6775943405921113, "grad_norm": 0.05029296875, "learning_rate": 0.001049842322871116, "loss": 1.1645, "step": 7727 }, { "epoch": 0.6776820323664858, "grad_norm": 0.0546875, "learning_rate": 0.0010494771667718989, "loss": 1.2602, "step": 7728 }, { "epoch": 0.6777697241408603, "grad_norm": 0.052001953125, "learning_rate": 0.0010491120654388146, "loss": 1.207, "step": 7729 }, { "epoch": 0.677857415915235, "grad_norm": 0.059814453125, "learning_rate": 0.001048747018905158, "loss": 1.1972, "step": 7730 }, { "epoch": 0.6779451076896095, "grad_norm": 0.049560546875, "learning_rate": 0.0010483820272042216, "loss": 1.1917, "step": 7731 }, { "epoch": 0.678032799463984, "grad_norm": 0.0537109375, "learning_rate": 0.0010480170903692909, "loss": 1.2119, "step": 7732 }, { "epoch": 0.6781204912383586, "grad_norm": 0.072265625, "learning_rate": 0.0010476522084336482, "loss": 1.1719, "step": 7733 }, { "epoch": 0.6782081830127331, "grad_norm": 0.04833984375, "learning_rate": 0.0010472873814305684, "loss": 1.1747, "step": 7734 }, { "epoch": 0.6782958747871076, "grad_norm": 0.0693359375, "learning_rate": 0.0010469226093933245, "loss": 1.2123, "step": 7735 }, { "epoch": 0.6783835665614822, "grad_norm": 0.052001953125, "learning_rate": 0.001046557892355181, "loss": 1.101, "step": 7736 }, { "epoch": 0.6784712583358568, "grad_norm": 0.053955078125, "learning_rate": 0.0010461932303493999, "loss": 1.2036, "step": 7737 }, { "epoch": 0.6785589501102313, "grad_norm": 0.056396484375, "learning_rate": 0.0010458286234092378, "loss": 1.1911, "step": 7738 }, { "epoch": 0.6786466418846059, "grad_norm": 0.054931640625, "learning_rate": 0.0010454640715679452, "loss": 1.1146, "step": 7739 }, { "epoch": 0.6787343336589804, "grad_norm": 0.05224609375, "learning_rate": 0.001045099574858769, "loss": 1.1614, "step": 7740 }, { "epoch": 0.6788220254333549, "grad_norm": 0.0556640625, "learning_rate": 0.0010447351333149498, "loss": 1.1795, "step": 7741 }, { "epoch": 0.6789097172077295, "grad_norm": 0.06982421875, "learning_rate": 0.0010443707469697245, "loss": 1.1844, "step": 7742 }, { "epoch": 0.678997408982104, "grad_norm": 0.0546875, "learning_rate": 0.001044006415856323, "loss": 1.2397, "step": 7743 }, { "epoch": 0.6790851007564785, "grad_norm": 0.049072265625, "learning_rate": 0.001043642140007972, "loss": 1.1726, "step": 7744 }, { "epoch": 0.6791727925308532, "grad_norm": 0.07080078125, "learning_rate": 0.0010432779194578926, "loss": 1.2024, "step": 7745 }, { "epoch": 0.6792604843052277, "grad_norm": 0.05712890625, "learning_rate": 0.0010429137542393014, "loss": 1.1542, "step": 7746 }, { "epoch": 0.6793481760796022, "grad_norm": 0.046875, "learning_rate": 0.001042549644385408, "loss": 1.2067, "step": 7747 }, { "epoch": 0.6794358678539768, "grad_norm": 0.055908203125, "learning_rate": 0.0010421855899294188, "loss": 1.137, "step": 7748 }, { "epoch": 0.6795235596283513, "grad_norm": 0.056396484375, "learning_rate": 0.0010418215909045348, "loss": 1.1481, "step": 7749 }, { "epoch": 0.6796112514027258, "grad_norm": 0.051513671875, "learning_rate": 0.0010414576473439523, "loss": 1.1662, "step": 7750 }, { "epoch": 0.6796989431771003, "grad_norm": 0.07080078125, "learning_rate": 0.0010410937592808607, "loss": 1.118, "step": 7751 }, { "epoch": 0.679786634951475, "grad_norm": 0.06103515625, "learning_rate": 0.001040729926748447, "loss": 1.1913, "step": 7752 }, { "epoch": 0.6798743267258495, "grad_norm": 0.0771484375, "learning_rate": 0.0010403661497798908, "loss": 1.1406, "step": 7753 }, { "epoch": 0.679962018500224, "grad_norm": 0.07568359375, "learning_rate": 0.0010400024284083677, "loss": 1.2006, "step": 7754 }, { "epoch": 0.6800497102745986, "grad_norm": 0.05859375, "learning_rate": 0.0010396387626670494, "loss": 1.1949, "step": 7755 }, { "epoch": 0.6801374020489731, "grad_norm": 0.07080078125, "learning_rate": 0.0010392751525890992, "loss": 1.1434, "step": 7756 }, { "epoch": 0.6802250938233476, "grad_norm": 0.064453125, "learning_rate": 0.0010389115982076787, "loss": 1.1849, "step": 7757 }, { "epoch": 0.6803127855977222, "grad_norm": 0.0830078125, "learning_rate": 0.0010385480995559432, "loss": 1.1823, "step": 7758 }, { "epoch": 0.6804004773720967, "grad_norm": 0.060791015625, "learning_rate": 0.0010381846566670425, "loss": 1.0831, "step": 7759 }, { "epoch": 0.6804881691464713, "grad_norm": 0.09619140625, "learning_rate": 0.001037821269574122, "loss": 1.1935, "step": 7760 }, { "epoch": 0.6805758609208459, "grad_norm": 0.10693359375, "learning_rate": 0.0010374579383103208, "loss": 1.1621, "step": 7761 }, { "epoch": 0.6806635526952204, "grad_norm": 0.059326171875, "learning_rate": 0.0010370946629087747, "loss": 1.1264, "step": 7762 }, { "epoch": 0.6807512444695949, "grad_norm": 0.0966796875, "learning_rate": 0.0010367314434026127, "loss": 1.1196, "step": 7763 }, { "epoch": 0.6808389362439695, "grad_norm": 0.109375, "learning_rate": 0.001036368279824961, "loss": 1.2156, "step": 7764 }, { "epoch": 0.680926628018344, "grad_norm": 0.068359375, "learning_rate": 0.001036005172208937, "loss": 1.1695, "step": 7765 }, { "epoch": 0.6810143197927185, "grad_norm": 0.11328125, "learning_rate": 0.0010356421205876566, "loss": 1.2165, "step": 7766 }, { "epoch": 0.6811020115670932, "grad_norm": 0.08251953125, "learning_rate": 0.0010352791249942288, "loss": 1.177, "step": 7767 }, { "epoch": 0.6811897033414677, "grad_norm": 0.0634765625, "learning_rate": 0.0010349161854617582, "loss": 1.2447, "step": 7768 }, { "epoch": 0.6812773951158422, "grad_norm": 0.05517578125, "learning_rate": 0.0010345533020233442, "loss": 1.1668, "step": 7769 }, { "epoch": 0.6813650868902168, "grad_norm": 0.08984375, "learning_rate": 0.0010341904747120793, "loss": 1.185, "step": 7770 }, { "epoch": 0.6814527786645913, "grad_norm": 0.0888671875, "learning_rate": 0.0010338277035610536, "loss": 1.1477, "step": 7771 }, { "epoch": 0.6815404704389658, "grad_norm": 0.050537109375, "learning_rate": 0.0010334649886033507, "loss": 1.1317, "step": 7772 }, { "epoch": 0.6816281622133404, "grad_norm": 0.08642578125, "learning_rate": 0.0010331023298720497, "loss": 1.2403, "step": 7773 }, { "epoch": 0.681715853987715, "grad_norm": 0.051513671875, "learning_rate": 0.0010327397274002234, "loss": 1.2217, "step": 7774 }, { "epoch": 0.6818035457620895, "grad_norm": 0.06298828125, "learning_rate": 0.00103237718122094, "loss": 1.1547, "step": 7775 }, { "epoch": 0.681891237536464, "grad_norm": 0.059326171875, "learning_rate": 0.001032014691367264, "loss": 1.1425, "step": 7776 }, { "epoch": 0.6819789293108386, "grad_norm": 0.072265625, "learning_rate": 0.0010316522578722527, "loss": 1.1884, "step": 7777 }, { "epoch": 0.6820666210852131, "grad_norm": 0.046630859375, "learning_rate": 0.001031289880768959, "loss": 1.1743, "step": 7778 }, { "epoch": 0.6821543128595876, "grad_norm": 0.052001953125, "learning_rate": 0.001030927560090431, "loss": 1.2238, "step": 7779 }, { "epoch": 0.6822420046339622, "grad_norm": 0.048828125, "learning_rate": 0.001030565295869711, "loss": 1.1364, "step": 7780 }, { "epoch": 0.6823296964083367, "grad_norm": 0.050048828125, "learning_rate": 0.001030203088139838, "loss": 1.191, "step": 7781 }, { "epoch": 0.6824173881827112, "grad_norm": 0.049072265625, "learning_rate": 0.0010298409369338425, "loss": 1.2592, "step": 7782 }, { "epoch": 0.6825050799570859, "grad_norm": 0.052734375, "learning_rate": 0.001029478842284753, "loss": 1.2156, "step": 7783 }, { "epoch": 0.6825927717314604, "grad_norm": 0.06689453125, "learning_rate": 0.0010291168042255907, "loss": 1.1371, "step": 7784 }, { "epoch": 0.6826804635058349, "grad_norm": 0.047607421875, "learning_rate": 0.0010287548227893735, "loss": 1.1553, "step": 7785 }, { "epoch": 0.6827681552802095, "grad_norm": 0.054443359375, "learning_rate": 0.0010283928980091132, "loss": 1.1452, "step": 7786 }, { "epoch": 0.682855847054584, "grad_norm": 0.06787109375, "learning_rate": 0.0010280310299178158, "loss": 1.1205, "step": 7787 }, { "epoch": 0.6829435388289585, "grad_norm": 0.046630859375, "learning_rate": 0.0010276692185484822, "loss": 1.161, "step": 7788 }, { "epoch": 0.6830312306033332, "grad_norm": 0.051513671875, "learning_rate": 0.0010273074639341095, "loss": 1.1977, "step": 7789 }, { "epoch": 0.6831189223777077, "grad_norm": 0.0615234375, "learning_rate": 0.0010269457661076888, "loss": 1.1944, "step": 7790 }, { "epoch": 0.6832066141520822, "grad_norm": 0.061279296875, "learning_rate": 0.0010265841251022058, "loss": 1.1523, "step": 7791 }, { "epoch": 0.6832943059264568, "grad_norm": 0.049072265625, "learning_rate": 0.0010262225409506413, "loss": 1.1861, "step": 7792 }, { "epoch": 0.6833819977008313, "grad_norm": 0.052001953125, "learning_rate": 0.0010258610136859704, "loss": 1.1505, "step": 7793 }, { "epoch": 0.6834696894752058, "grad_norm": 0.0546875, "learning_rate": 0.0010254995433411642, "loss": 1.0927, "step": 7794 }, { "epoch": 0.6835573812495804, "grad_norm": 0.05419921875, "learning_rate": 0.001025138129949188, "loss": 1.1997, "step": 7795 }, { "epoch": 0.6836450730239549, "grad_norm": 0.060791015625, "learning_rate": 0.0010247767735430003, "loss": 1.1763, "step": 7796 }, { "epoch": 0.6837327647983295, "grad_norm": 0.0634765625, "learning_rate": 0.0010244154741555574, "loss": 1.183, "step": 7797 }, { "epoch": 0.6838204565727041, "grad_norm": 0.04931640625, "learning_rate": 0.0010240542318198079, "loss": 1.147, "step": 7798 }, { "epoch": 0.6839081483470786, "grad_norm": 0.057861328125, "learning_rate": 0.0010236930465686972, "loss": 1.2244, "step": 7799 }, { "epoch": 0.6839958401214531, "grad_norm": 0.050537109375, "learning_rate": 0.0010233319184351636, "loss": 1.1487, "step": 7800 }, { "epoch": 0.6840835318958276, "grad_norm": 0.056640625, "learning_rate": 0.001022970847452141, "loss": 1.1376, "step": 7801 }, { "epoch": 0.6841712236702022, "grad_norm": 0.0458984375, "learning_rate": 0.0010226098336525587, "loss": 1.226, "step": 7802 }, { "epoch": 0.6842589154445767, "grad_norm": 0.053955078125, "learning_rate": 0.0010222488770693402, "loss": 1.1607, "step": 7803 }, { "epoch": 0.6843466072189512, "grad_norm": 0.04931640625, "learning_rate": 0.001021887977735404, "loss": 1.1227, "step": 7804 }, { "epoch": 0.6844342989933259, "grad_norm": 0.07470703125, "learning_rate": 0.0010215271356836617, "loss": 1.1505, "step": 7805 }, { "epoch": 0.6845219907677004, "grad_norm": 0.05517578125, "learning_rate": 0.0010211663509470227, "loss": 1.2027, "step": 7806 }, { "epoch": 0.6846096825420749, "grad_norm": 0.07177734375, "learning_rate": 0.0010208056235583888, "loss": 1.2282, "step": 7807 }, { "epoch": 0.6846973743164495, "grad_norm": 0.0791015625, "learning_rate": 0.0010204449535506585, "loss": 1.2021, "step": 7808 }, { "epoch": 0.684785066090824, "grad_norm": 0.05859375, "learning_rate": 0.0010200843409567228, "loss": 1.2346, "step": 7809 }, { "epoch": 0.6848727578651985, "grad_norm": 0.056640625, "learning_rate": 0.001019723785809469, "loss": 1.1765, "step": 7810 }, { "epoch": 0.6849604496395731, "grad_norm": 0.06201171875, "learning_rate": 0.001019363288141779, "loss": 1.1508, "step": 7811 }, { "epoch": 0.6850481414139477, "grad_norm": 0.06005859375, "learning_rate": 0.0010190028479865286, "loss": 1.1796, "step": 7812 }, { "epoch": 0.6851358331883222, "grad_norm": 0.06005859375, "learning_rate": 0.0010186424653765905, "loss": 1.1717, "step": 7813 }, { "epoch": 0.6852235249626968, "grad_norm": 0.07275390625, "learning_rate": 0.0010182821403448294, "loss": 1.1739, "step": 7814 }, { "epoch": 0.6853112167370713, "grad_norm": 0.046630859375, "learning_rate": 0.001017921872924106, "loss": 1.1601, "step": 7815 }, { "epoch": 0.6853989085114458, "grad_norm": 0.048828125, "learning_rate": 0.0010175616631472762, "loss": 1.1553, "step": 7816 }, { "epoch": 0.6854866002858204, "grad_norm": 0.05615234375, "learning_rate": 0.0010172015110471906, "loss": 1.1416, "step": 7817 }, { "epoch": 0.6855742920601949, "grad_norm": 0.0556640625, "learning_rate": 0.0010168414166566932, "loss": 1.1277, "step": 7818 }, { "epoch": 0.6856619838345694, "grad_norm": 0.072265625, "learning_rate": 0.0010164813800086238, "loss": 1.2275, "step": 7819 }, { "epoch": 0.6857496756089441, "grad_norm": 0.05322265625, "learning_rate": 0.0010161214011358182, "loss": 1.1952, "step": 7820 }, { "epoch": 0.6858373673833186, "grad_norm": 0.0537109375, "learning_rate": 0.0010157614800711035, "loss": 1.1654, "step": 7821 }, { "epoch": 0.6859250591576931, "grad_norm": 0.044677734375, "learning_rate": 0.0010154016168473054, "loss": 1.1479, "step": 7822 }, { "epoch": 0.6860127509320676, "grad_norm": 0.0556640625, "learning_rate": 0.0010150418114972409, "loss": 1.1534, "step": 7823 }, { "epoch": 0.6861004427064422, "grad_norm": 0.048095703125, "learning_rate": 0.0010146820640537237, "loss": 1.2334, "step": 7824 }, { "epoch": 0.6861881344808167, "grad_norm": 0.04638671875, "learning_rate": 0.0010143223745495626, "loss": 1.1453, "step": 7825 }, { "epoch": 0.6862758262551912, "grad_norm": 0.080078125, "learning_rate": 0.0010139627430175604, "loss": 1.2304, "step": 7826 }, { "epoch": 0.6863635180295659, "grad_norm": 0.052734375, "learning_rate": 0.0010136031694905137, "loss": 1.2075, "step": 7827 }, { "epoch": 0.6864512098039404, "grad_norm": 0.054931640625, "learning_rate": 0.001013243654001215, "loss": 1.1798, "step": 7828 }, { "epoch": 0.6865389015783149, "grad_norm": 0.07666015625, "learning_rate": 0.0010128841965824512, "loss": 1.1944, "step": 7829 }, { "epoch": 0.6866265933526895, "grad_norm": 0.05712890625, "learning_rate": 0.0010125247972670045, "loss": 1.2052, "step": 7830 }, { "epoch": 0.686714285127064, "grad_norm": 0.050048828125, "learning_rate": 0.0010121654560876503, "loss": 1.1967, "step": 7831 }, { "epoch": 0.6868019769014385, "grad_norm": 0.055419921875, "learning_rate": 0.0010118061730771599, "loss": 1.1353, "step": 7832 }, { "epoch": 0.6868896686758131, "grad_norm": 0.057861328125, "learning_rate": 0.001011446948268299, "loss": 1.1943, "step": 7833 }, { "epoch": 0.6869773604501876, "grad_norm": 0.054443359375, "learning_rate": 0.0010110877816938285, "loss": 1.1773, "step": 7834 }, { "epoch": 0.6870650522245622, "grad_norm": 0.04736328125, "learning_rate": 0.0010107286733865024, "loss": 1.2001, "step": 7835 }, { "epoch": 0.6871527439989368, "grad_norm": 0.0478515625, "learning_rate": 0.001010369623379071, "loss": 1.166, "step": 7836 }, { "epoch": 0.6872404357733113, "grad_norm": 0.0517578125, "learning_rate": 0.0010100106317042794, "loss": 1.1819, "step": 7837 }, { "epoch": 0.6873281275476858, "grad_norm": 0.05810546875, "learning_rate": 0.0010096516983948652, "loss": 1.1831, "step": 7838 }, { "epoch": 0.6874158193220604, "grad_norm": 0.0595703125, "learning_rate": 0.0010092928234835634, "loss": 1.218, "step": 7839 }, { "epoch": 0.6875035110964349, "grad_norm": 0.0517578125, "learning_rate": 0.0010089340070031027, "loss": 1.142, "step": 7840 }, { "epoch": 0.6875912028708094, "grad_norm": 0.056640625, "learning_rate": 0.001008575248986205, "loss": 1.1961, "step": 7841 }, { "epoch": 0.6876788946451841, "grad_norm": 0.0625, "learning_rate": 0.0010082165494655886, "loss": 1.1309, "step": 7842 }, { "epoch": 0.6877665864195586, "grad_norm": 0.05908203125, "learning_rate": 0.0010078579084739664, "loss": 1.1507, "step": 7843 }, { "epoch": 0.6878542781939331, "grad_norm": 0.0478515625, "learning_rate": 0.0010074993260440455, "loss": 1.1802, "step": 7844 }, { "epoch": 0.6879419699683077, "grad_norm": 0.051025390625, "learning_rate": 0.0010071408022085272, "loss": 1.2817, "step": 7845 }, { "epoch": 0.6880296617426822, "grad_norm": 0.05126953125, "learning_rate": 0.0010067823370001083, "loss": 1.2218, "step": 7846 }, { "epoch": 0.6881173535170567, "grad_norm": 0.048095703125, "learning_rate": 0.0010064239304514795, "loss": 1.1078, "step": 7847 }, { "epoch": 0.6882050452914312, "grad_norm": 0.0576171875, "learning_rate": 0.0010060655825953274, "loss": 1.1838, "step": 7848 }, { "epoch": 0.6882927370658058, "grad_norm": 0.046142578125, "learning_rate": 0.0010057072934643316, "loss": 1.1791, "step": 7849 }, { "epoch": 0.6883804288401804, "grad_norm": 0.05419921875, "learning_rate": 0.0010053490630911673, "loss": 1.2187, "step": 7850 }, { "epoch": 0.6884681206145549, "grad_norm": 0.046142578125, "learning_rate": 0.0010049908915085043, "loss": 1.1275, "step": 7851 }, { "epoch": 0.6885558123889295, "grad_norm": 0.07275390625, "learning_rate": 0.0010046327787490074, "loss": 1.183, "step": 7852 }, { "epoch": 0.688643504163304, "grad_norm": 0.05322265625, "learning_rate": 0.0010042747248453345, "loss": 1.1878, "step": 7853 }, { "epoch": 0.6887311959376785, "grad_norm": 0.052734375, "learning_rate": 0.0010039167298301402, "loss": 1.1179, "step": 7854 }, { "epoch": 0.6888188877120531, "grad_norm": 0.051025390625, "learning_rate": 0.001003558793736072, "loss": 1.1906, "step": 7855 }, { "epoch": 0.6889065794864276, "grad_norm": 0.072265625, "learning_rate": 0.0010032009165957727, "loss": 1.2022, "step": 7856 }, { "epoch": 0.6889942712608021, "grad_norm": 0.053955078125, "learning_rate": 0.001002843098441881, "loss": 1.1866, "step": 7857 }, { "epoch": 0.6890819630351768, "grad_norm": 0.053466796875, "learning_rate": 0.0010024853393070274, "loss": 1.1605, "step": 7858 }, { "epoch": 0.6891696548095513, "grad_norm": 0.051513671875, "learning_rate": 0.0010021276392238392, "loss": 1.1927, "step": 7859 }, { "epoch": 0.6892573465839258, "grad_norm": 0.09814453125, "learning_rate": 0.001001769998224938, "loss": 1.1833, "step": 7860 }, { "epoch": 0.6893450383583004, "grad_norm": 0.05029296875, "learning_rate": 0.00100141241634294, "loss": 1.1448, "step": 7861 }, { "epoch": 0.6894327301326749, "grad_norm": 0.059814453125, "learning_rate": 0.0010010548936104549, "loss": 1.1998, "step": 7862 }, { "epoch": 0.6895204219070494, "grad_norm": 0.052734375, "learning_rate": 0.001000697430060088, "loss": 1.1464, "step": 7863 }, { "epoch": 0.689608113681424, "grad_norm": 0.06396484375, "learning_rate": 0.0010003400257244395, "loss": 1.1326, "step": 7864 }, { "epoch": 0.6896958054557986, "grad_norm": 0.095703125, "learning_rate": 0.0009999826806361036, "loss": 1.1345, "step": 7865 }, { "epoch": 0.6897834972301731, "grad_norm": 0.0830078125, "learning_rate": 0.0009996253948276698, "loss": 1.2134, "step": 7866 }, { "epoch": 0.6898711890045477, "grad_norm": 0.06884765625, "learning_rate": 0.0009992681683317203, "loss": 1.1524, "step": 7867 }, { "epoch": 0.6899588807789222, "grad_norm": 0.0908203125, "learning_rate": 0.0009989110011808342, "loss": 1.1661, "step": 7868 }, { "epoch": 0.6900465725532967, "grad_norm": 0.054931640625, "learning_rate": 0.0009985538934075837, "loss": 1.0934, "step": 7869 }, { "epoch": 0.6901342643276713, "grad_norm": 0.06298828125, "learning_rate": 0.0009981968450445372, "loss": 1.1519, "step": 7870 }, { "epoch": 0.6902219561020458, "grad_norm": 0.05517578125, "learning_rate": 0.000997839856124256, "loss": 1.2282, "step": 7871 }, { "epoch": 0.6903096478764204, "grad_norm": 0.0947265625, "learning_rate": 0.0009974829266792954, "loss": 1.1629, "step": 7872 }, { "epoch": 0.6903973396507949, "grad_norm": 0.09521484375, "learning_rate": 0.0009971260567422078, "loss": 1.1473, "step": 7873 }, { "epoch": 0.6904850314251695, "grad_norm": 0.0703125, "learning_rate": 0.0009967692463455384, "loss": 1.1496, "step": 7874 }, { "epoch": 0.690572723199544, "grad_norm": 0.0703125, "learning_rate": 0.0009964124955218277, "loss": 1.1355, "step": 7875 }, { "epoch": 0.6906604149739185, "grad_norm": 0.0751953125, "learning_rate": 0.0009960558043036099, "loss": 1.1202, "step": 7876 }, { "epoch": 0.6907481067482931, "grad_norm": 0.07568359375, "learning_rate": 0.0009956991727234144, "loss": 1.1179, "step": 7877 }, { "epoch": 0.6908357985226676, "grad_norm": 0.0498046875, "learning_rate": 0.0009953426008137655, "loss": 1.1413, "step": 7878 }, { "epoch": 0.6909234902970421, "grad_norm": 0.0546875, "learning_rate": 0.0009949860886071817, "loss": 1.2019, "step": 7879 }, { "epoch": 0.6910111820714168, "grad_norm": 0.046875, "learning_rate": 0.0009946296361361751, "loss": 1.1615, "step": 7880 }, { "epoch": 0.6910988738457913, "grad_norm": 0.06982421875, "learning_rate": 0.0009942732434332544, "loss": 1.1545, "step": 7881 }, { "epoch": 0.6911865656201658, "grad_norm": 0.053955078125, "learning_rate": 0.000993916910530921, "loss": 1.1916, "step": 7882 }, { "epoch": 0.6912742573945404, "grad_norm": 0.050048828125, "learning_rate": 0.0009935606374616717, "loss": 1.1626, "step": 7883 }, { "epoch": 0.6913619491689149, "grad_norm": 0.047607421875, "learning_rate": 0.0009932044242579976, "loss": 1.2556, "step": 7884 }, { "epoch": 0.6914496409432894, "grad_norm": 0.058837890625, "learning_rate": 0.0009928482709523844, "loss": 1.2152, "step": 7885 }, { "epoch": 0.691537332717664, "grad_norm": 0.05419921875, "learning_rate": 0.0009924921775773124, "loss": 1.1518, "step": 7886 }, { "epoch": 0.6916250244920386, "grad_norm": 0.0693359375, "learning_rate": 0.000992136144165257, "loss": 1.1902, "step": 7887 }, { "epoch": 0.6917127162664131, "grad_norm": 0.05712890625, "learning_rate": 0.0009917801707486872, "loss": 1.1379, "step": 7888 }, { "epoch": 0.6918004080407877, "grad_norm": 0.057861328125, "learning_rate": 0.0009914242573600656, "loss": 1.1534, "step": 7889 }, { "epoch": 0.6918880998151622, "grad_norm": 0.050537109375, "learning_rate": 0.0009910684040318524, "loss": 1.1918, "step": 7890 }, { "epoch": 0.6919757915895367, "grad_norm": 0.0517578125, "learning_rate": 0.0009907126107964988, "loss": 1.219, "step": 7891 }, { "epoch": 0.6920634833639113, "grad_norm": 0.056396484375, "learning_rate": 0.0009903568776864539, "loss": 1.2361, "step": 7892 }, { "epoch": 0.6921511751382858, "grad_norm": 0.05810546875, "learning_rate": 0.000990001204734159, "loss": 1.1436, "step": 7893 }, { "epoch": 0.6922388669126603, "grad_norm": 0.053466796875, "learning_rate": 0.00098964559197205, "loss": 1.1643, "step": 7894 }, { "epoch": 0.6923265586870349, "grad_norm": 0.0458984375, "learning_rate": 0.0009892900394325584, "loss": 1.147, "step": 7895 }, { "epoch": 0.6924142504614095, "grad_norm": 0.05615234375, "learning_rate": 0.0009889345471481095, "loss": 1.1912, "step": 7896 }, { "epoch": 0.692501942235784, "grad_norm": 0.053955078125, "learning_rate": 0.0009885791151511242, "loss": 1.1773, "step": 7897 }, { "epoch": 0.6925896340101585, "grad_norm": 0.047119140625, "learning_rate": 0.0009882237434740153, "loss": 1.1853, "step": 7898 }, { "epoch": 0.6926773257845331, "grad_norm": 0.0546875, "learning_rate": 0.0009878684321491928, "loss": 1.2441, "step": 7899 }, { "epoch": 0.6927650175589076, "grad_norm": 0.05078125, "learning_rate": 0.0009875131812090597, "loss": 1.1106, "step": 7900 }, { "epoch": 0.6928527093332821, "grad_norm": 0.046142578125, "learning_rate": 0.0009871579906860152, "loss": 1.1541, "step": 7901 }, { "epoch": 0.6929404011076568, "grad_norm": 0.0625, "learning_rate": 0.0009868028606124502, "loss": 1.1978, "step": 7902 }, { "epoch": 0.6930280928820313, "grad_norm": 0.060791015625, "learning_rate": 0.000986447791020752, "loss": 1.1594, "step": 7903 }, { "epoch": 0.6931157846564058, "grad_norm": 0.052734375, "learning_rate": 0.0009860927819433027, "loss": 1.1643, "step": 7904 }, { "epoch": 0.6932034764307804, "grad_norm": 0.052490234375, "learning_rate": 0.0009857378334124781, "loss": 1.1657, "step": 7905 }, { "epoch": 0.6932911682051549, "grad_norm": 0.056884765625, "learning_rate": 0.0009853829454606483, "loss": 1.1681, "step": 7906 }, { "epoch": 0.6933788599795294, "grad_norm": 0.052978515625, "learning_rate": 0.0009850281181201777, "loss": 1.1625, "step": 7907 }, { "epoch": 0.693466551753904, "grad_norm": 0.056640625, "learning_rate": 0.0009846733514234263, "loss": 1.1691, "step": 7908 }, { "epoch": 0.6935542435282785, "grad_norm": 0.051025390625, "learning_rate": 0.0009843186454027476, "loss": 1.1444, "step": 7909 }, { "epoch": 0.6936419353026531, "grad_norm": 0.059814453125, "learning_rate": 0.0009839640000904905, "loss": 1.2035, "step": 7910 }, { "epoch": 0.6937296270770277, "grad_norm": 0.049560546875, "learning_rate": 0.0009836094155189965, "loss": 1.1555, "step": 7911 }, { "epoch": 0.6938173188514022, "grad_norm": 0.054443359375, "learning_rate": 0.0009832548917206039, "loss": 1.2188, "step": 7912 }, { "epoch": 0.6939050106257767, "grad_norm": 0.053466796875, "learning_rate": 0.0009829004287276438, "loss": 1.12, "step": 7913 }, { "epoch": 0.6939927024001513, "grad_norm": 0.06640625, "learning_rate": 0.000982546026572443, "loss": 1.1519, "step": 7914 }, { "epoch": 0.6940803941745258, "grad_norm": 0.045166015625, "learning_rate": 0.000982191685287321, "loss": 1.196, "step": 7915 }, { "epoch": 0.6941680859489003, "grad_norm": 0.04736328125, "learning_rate": 0.0009818374049045933, "loss": 1.1174, "step": 7916 }, { "epoch": 0.694255777723275, "grad_norm": 0.072265625, "learning_rate": 0.0009814831854565696, "loss": 1.2508, "step": 7917 }, { "epoch": 0.6943434694976495, "grad_norm": 0.062255859375, "learning_rate": 0.0009811290269755534, "loss": 1.1143, "step": 7918 }, { "epoch": 0.694431161272024, "grad_norm": 0.048828125, "learning_rate": 0.0009807749294938439, "loss": 1.1253, "step": 7919 }, { "epoch": 0.6945188530463985, "grad_norm": 0.052001953125, "learning_rate": 0.0009804208930437328, "loss": 1.174, "step": 7920 }, { "epoch": 0.6946065448207731, "grad_norm": 0.052490234375, "learning_rate": 0.0009800669176575076, "loss": 1.1475, "step": 7921 }, { "epoch": 0.6946942365951476, "grad_norm": 0.046142578125, "learning_rate": 0.0009797130033674505, "loss": 1.1411, "step": 7922 }, { "epoch": 0.6947819283695221, "grad_norm": 0.052978515625, "learning_rate": 0.0009793591502058369, "loss": 1.2073, "step": 7923 }, { "epoch": 0.6948696201438967, "grad_norm": 0.0517578125, "learning_rate": 0.0009790053582049382, "loss": 1.1511, "step": 7924 }, { "epoch": 0.6949573119182713, "grad_norm": 0.054931640625, "learning_rate": 0.0009786516273970177, "loss": 1.1829, "step": 7925 }, { "epoch": 0.6950450036926458, "grad_norm": 0.046630859375, "learning_rate": 0.000978297957814336, "loss": 1.1423, "step": 7926 }, { "epoch": 0.6951326954670204, "grad_norm": 0.057373046875, "learning_rate": 0.0009779443494891463, "loss": 1.23, "step": 7927 }, { "epoch": 0.6952203872413949, "grad_norm": 0.04833984375, "learning_rate": 0.000977590802453698, "loss": 1.1845, "step": 7928 }, { "epoch": 0.6953080790157694, "grad_norm": 0.05810546875, "learning_rate": 0.0009772373167402319, "loss": 1.2175, "step": 7929 }, { "epoch": 0.695395770790144, "grad_norm": 0.04638671875, "learning_rate": 0.0009768838923809857, "loss": 1.1854, "step": 7930 }, { "epoch": 0.6954834625645185, "grad_norm": 0.054443359375, "learning_rate": 0.0009765305294081909, "loss": 1.1343, "step": 7931 }, { "epoch": 0.695571154338893, "grad_norm": 0.050537109375, "learning_rate": 0.000976177227854074, "loss": 1.1393, "step": 7932 }, { "epoch": 0.6956588461132677, "grad_norm": 0.054443359375, "learning_rate": 0.0009758239877508537, "loss": 1.125, "step": 7933 }, { "epoch": 0.6957465378876422, "grad_norm": 0.05810546875, "learning_rate": 0.0009754708091307454, "loss": 1.1531, "step": 7934 }, { "epoch": 0.6958342296620167, "grad_norm": 0.056396484375, "learning_rate": 0.0009751176920259581, "loss": 1.1854, "step": 7935 }, { "epoch": 0.6959219214363913, "grad_norm": 0.05322265625, "learning_rate": 0.0009747646364686956, "loss": 1.1111, "step": 7936 }, { "epoch": 0.6960096132107658, "grad_norm": 0.060791015625, "learning_rate": 0.0009744116424911546, "loss": 1.1953, "step": 7937 }, { "epoch": 0.6960973049851403, "grad_norm": 0.050048828125, "learning_rate": 0.0009740587101255278, "loss": 1.1472, "step": 7938 }, { "epoch": 0.696184996759515, "grad_norm": 0.052978515625, "learning_rate": 0.000973705839404002, "loss": 1.1566, "step": 7939 }, { "epoch": 0.6962726885338895, "grad_norm": 0.0517578125, "learning_rate": 0.0009733530303587575, "loss": 1.2069, "step": 7940 }, { "epoch": 0.696360380308264, "grad_norm": 0.072265625, "learning_rate": 0.0009730002830219703, "loss": 1.2087, "step": 7941 }, { "epoch": 0.6964480720826385, "grad_norm": 0.0615234375, "learning_rate": 0.0009726475974258092, "loss": 1.1771, "step": 7942 }, { "epoch": 0.6965357638570131, "grad_norm": 0.0478515625, "learning_rate": 0.0009722949736024386, "loss": 1.1937, "step": 7943 }, { "epoch": 0.6966234556313876, "grad_norm": 0.05322265625, "learning_rate": 0.0009719424115840169, "loss": 1.1505, "step": 7944 }, { "epoch": 0.6967111474057621, "grad_norm": 0.04345703125, "learning_rate": 0.000971589911402697, "loss": 1.1011, "step": 7945 }, { "epoch": 0.6967988391801367, "grad_norm": 0.048828125, "learning_rate": 0.0009712374730906264, "loss": 1.1679, "step": 7946 }, { "epoch": 0.6968865309545113, "grad_norm": 0.052490234375, "learning_rate": 0.0009708850966799453, "loss": 1.2061, "step": 7947 }, { "epoch": 0.6969742227288858, "grad_norm": 0.056640625, "learning_rate": 0.0009705327822027904, "loss": 1.1197, "step": 7948 }, { "epoch": 0.6970619145032604, "grad_norm": 0.048828125, "learning_rate": 0.0009701805296912918, "loss": 1.1613, "step": 7949 }, { "epoch": 0.6971496062776349, "grad_norm": 0.0576171875, "learning_rate": 0.0009698283391775742, "loss": 1.1812, "step": 7950 }, { "epoch": 0.6972372980520094, "grad_norm": 0.05126953125, "learning_rate": 0.0009694762106937559, "loss": 1.1341, "step": 7951 }, { "epoch": 0.697324989826384, "grad_norm": 0.046630859375, "learning_rate": 0.0009691241442719503, "loss": 1.1685, "step": 7952 }, { "epoch": 0.6974126816007585, "grad_norm": 0.046630859375, "learning_rate": 0.0009687721399442652, "loss": 1.1364, "step": 7953 }, { "epoch": 0.697500373375133, "grad_norm": 0.0576171875, "learning_rate": 0.0009684201977428027, "loss": 1.169, "step": 7954 }, { "epoch": 0.6975880651495077, "grad_norm": 0.049560546875, "learning_rate": 0.0009680683176996585, "loss": 1.1814, "step": 7955 }, { "epoch": 0.6976757569238822, "grad_norm": 0.052978515625, "learning_rate": 0.0009677164998469235, "loss": 1.2034, "step": 7956 }, { "epoch": 0.6977634486982567, "grad_norm": 0.055419921875, "learning_rate": 0.000967364744216682, "loss": 1.2099, "step": 7957 }, { "epoch": 0.6978511404726313, "grad_norm": 0.05029296875, "learning_rate": 0.0009670130508410137, "loss": 1.1687, "step": 7958 }, { "epoch": 0.6979388322470058, "grad_norm": 0.056640625, "learning_rate": 0.0009666614197519926, "loss": 1.1183, "step": 7959 }, { "epoch": 0.6980265240213803, "grad_norm": 0.08642578125, "learning_rate": 0.0009663098509816852, "loss": 1.2061, "step": 7960 }, { "epoch": 0.6981142157957549, "grad_norm": 0.07421875, "learning_rate": 0.0009659583445621545, "loss": 1.2052, "step": 7961 }, { "epoch": 0.6982019075701295, "grad_norm": 0.0859375, "learning_rate": 0.0009656069005254571, "loss": 1.2156, "step": 7962 }, { "epoch": 0.698289599344504, "grad_norm": 0.04736328125, "learning_rate": 0.000965255518903644, "loss": 1.1155, "step": 7963 }, { "epoch": 0.6983772911188786, "grad_norm": 0.05078125, "learning_rate": 0.0009649041997287594, "loss": 1.2042, "step": 7964 }, { "epoch": 0.6984649828932531, "grad_norm": 0.053955078125, "learning_rate": 0.0009645529430328435, "loss": 1.1426, "step": 7965 }, { "epoch": 0.6985526746676276, "grad_norm": 0.045166015625, "learning_rate": 0.0009642017488479296, "loss": 1.2129, "step": 7966 }, { "epoch": 0.6986403664420021, "grad_norm": 0.053955078125, "learning_rate": 0.0009638506172060461, "loss": 1.1868, "step": 7967 }, { "epoch": 0.6987280582163767, "grad_norm": 0.048828125, "learning_rate": 0.0009634995481392147, "loss": 1.1688, "step": 7968 }, { "epoch": 0.6988157499907512, "grad_norm": 0.052978515625, "learning_rate": 0.0009631485416794522, "loss": 1.1878, "step": 7969 }, { "epoch": 0.6989034417651258, "grad_norm": 0.050048828125, "learning_rate": 0.0009627975978587698, "loss": 1.1791, "step": 7970 }, { "epoch": 0.6989911335395004, "grad_norm": 0.05126953125, "learning_rate": 0.0009624467167091724, "loss": 1.167, "step": 7971 }, { "epoch": 0.6990788253138749, "grad_norm": 0.045654296875, "learning_rate": 0.0009620958982626598, "loss": 1.1485, "step": 7972 }, { "epoch": 0.6991665170882494, "grad_norm": 0.055419921875, "learning_rate": 0.0009617451425512258, "loss": 1.1901, "step": 7973 }, { "epoch": 0.699254208862624, "grad_norm": 0.0634765625, "learning_rate": 0.0009613944496068575, "loss": 1.1332, "step": 7974 }, { "epoch": 0.6993419006369985, "grad_norm": 0.0498046875, "learning_rate": 0.0009610438194615375, "loss": 1.1564, "step": 7975 }, { "epoch": 0.699429592411373, "grad_norm": 0.052001953125, "learning_rate": 0.0009606932521472428, "loss": 1.1759, "step": 7976 }, { "epoch": 0.6995172841857477, "grad_norm": 0.055419921875, "learning_rate": 0.0009603427476959449, "loss": 1.1269, "step": 7977 }, { "epoch": 0.6996049759601222, "grad_norm": 0.049072265625, "learning_rate": 0.0009599923061396073, "loss": 1.1501, "step": 7978 }, { "epoch": 0.6996926677344967, "grad_norm": 0.06591796875, "learning_rate": 0.00095964192751019, "loss": 1.2326, "step": 7979 }, { "epoch": 0.6997803595088713, "grad_norm": 0.07470703125, "learning_rate": 0.0009592916118396468, "loss": 1.1798, "step": 7980 }, { "epoch": 0.6998680512832458, "grad_norm": 0.087890625, "learning_rate": 0.0009589413591599262, "loss": 1.1608, "step": 7981 }, { "epoch": 0.6999557430576203, "grad_norm": 0.052978515625, "learning_rate": 0.0009585911695029691, "loss": 1.1932, "step": 7982 }, { "epoch": 0.7000434348319949, "grad_norm": 0.048095703125, "learning_rate": 0.0009582410429007125, "loss": 1.1229, "step": 7983 }, { "epoch": 0.7001311266063694, "grad_norm": 0.059814453125, "learning_rate": 0.0009578909793850871, "loss": 1.1371, "step": 7984 }, { "epoch": 0.700218818380744, "grad_norm": 0.055908203125, "learning_rate": 0.0009575409789880179, "loss": 1.2343, "step": 7985 }, { "epoch": 0.7003065101551186, "grad_norm": 0.0673828125, "learning_rate": 0.0009571910417414236, "loss": 1.2118, "step": 7986 }, { "epoch": 0.7003942019294931, "grad_norm": 0.050537109375, "learning_rate": 0.0009568411676772178, "loss": 1.1926, "step": 7987 }, { "epoch": 0.7004818937038676, "grad_norm": 0.05419921875, "learning_rate": 0.000956491356827308, "loss": 1.1504, "step": 7988 }, { "epoch": 0.7005695854782422, "grad_norm": 0.047607421875, "learning_rate": 0.0009561416092235969, "loss": 1.1404, "step": 7989 }, { "epoch": 0.7006572772526167, "grad_norm": 0.05615234375, "learning_rate": 0.0009557919248979796, "loss": 1.1305, "step": 7990 }, { "epoch": 0.7007449690269912, "grad_norm": 0.08837890625, "learning_rate": 0.0009554423038823464, "loss": 1.1959, "step": 7991 }, { "epoch": 0.7008326608013657, "grad_norm": 0.04931640625, "learning_rate": 0.0009550927462085818, "loss": 1.1835, "step": 7992 }, { "epoch": 0.7009203525757404, "grad_norm": 0.056884765625, "learning_rate": 0.0009547432519085652, "loss": 1.1443, "step": 7993 }, { "epoch": 0.7010080443501149, "grad_norm": 0.048828125, "learning_rate": 0.0009543938210141695, "loss": 1.1035, "step": 7994 }, { "epoch": 0.7010957361244894, "grad_norm": 0.0517578125, "learning_rate": 0.0009540444535572611, "loss": 1.1641, "step": 7995 }, { "epoch": 0.701183427898864, "grad_norm": 0.07763671875, "learning_rate": 0.0009536951495697022, "loss": 1.2494, "step": 7996 }, { "epoch": 0.7012711196732385, "grad_norm": 0.054931640625, "learning_rate": 0.0009533459090833479, "loss": 1.205, "step": 7997 }, { "epoch": 0.701358811447613, "grad_norm": 0.04443359375, "learning_rate": 0.0009529967321300489, "loss": 1.1268, "step": 7998 }, { "epoch": 0.7014465032219876, "grad_norm": 0.06298828125, "learning_rate": 0.0009526476187416481, "loss": 1.2106, "step": 7999 }, { "epoch": 0.7015341949963622, "grad_norm": 0.0830078125, "learning_rate": 0.0009522985689499842, "loss": 1.1998, "step": 8000 }, { "epoch": 0.7015341949963622, "eval_loss": 1.1820982694625854, "eval_runtime": 429.0952, "eval_samples_per_second": 33.669, "eval_steps_per_second": 8.418, "step": 8000 } ], "logging_steps": 1.0, "max_steps": 11403, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.9590270337024e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }