{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999383929357233, "eval_steps": 20000, "global_step": 12780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-08, "loss": 5.5827, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0416666666666667e-07, "loss": 5.5747, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.5625e-07, "loss": 6.7174, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.0833333333333333e-07, "loss": 5.7085, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.604166666666667e-07, "loss": 6.6178, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.125e-07, "loss": 6.3173, "step": 6 }, { "epoch": 0.0, "learning_rate": 3.6458333333333337e-07, "loss": 5.6015, "step": 7 }, { "epoch": 0.0, "learning_rate": 4.1666666666666667e-07, "loss": 5.4187, "step": 8 }, { "epoch": 0.0, "learning_rate": 4.6875000000000006e-07, "loss": 5.5212, "step": 9 }, { "epoch": 0.0, "learning_rate": 5.208333333333334e-07, "loss": 5.2638, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.729166666666667e-07, "loss": 4.1186, "step": 11 }, { "epoch": 0.0, "learning_rate": 6.25e-07, "loss": 3.7916, "step": 12 }, { "epoch": 0.0, "learning_rate": 6.770833333333333e-07, "loss": 3.0251, "step": 13 }, { "epoch": 0.0, "learning_rate": 7.291666666666667e-07, "loss": 3.15, "step": 14 }, { "epoch": 0.0, "learning_rate": 7.8125e-07, "loss": 2.7565, "step": 15 }, { "epoch": 0.0, "learning_rate": 8.333333333333333e-07, "loss": 2.5518, "step": 16 }, { "epoch": 0.0, "learning_rate": 8.854166666666668e-07, "loss": 2.314, "step": 17 }, { "epoch": 0.0, "learning_rate": 9.375000000000001e-07, "loss": 1.6436, "step": 18 }, { "epoch": 0.0, "learning_rate": 9.895833333333333e-07, "loss": 1.7501, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.0416666666666667e-06, "loss": 1.339, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.0937500000000001e-06, "loss": 0.9932, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.1458333333333333e-06, "loss": 0.4627, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.197916666666667e-06, "loss": 0.3419, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.25e-06, "loss": 0.3048, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.3020833333333335e-06, "loss": 0.2539, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.3541666666666667e-06, "loss": 0.2766, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.40625e-06, "loss": 0.2357, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.4583333333333335e-06, "loss": 0.2354, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.5104166666666667e-06, "loss": 0.304, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.5625e-06, "loss": 0.2149, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.6145833333333335e-06, "loss": 0.3174, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.6666666666666667e-06, "loss": 0.2118, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.71875e-06, "loss": 0.2422, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.7708333333333337e-06, "loss": 0.1679, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.8229166666666666e-06, "loss": 0.2461, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.8750000000000003e-06, "loss": 0.1618, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.9270833333333334e-06, "loss": 0.1895, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.9791666666666666e-06, "loss": 0.2213, "step": 38 }, { "epoch": 0.01, "learning_rate": 2.0312500000000002e-06, "loss": 0.2482, "step": 39 }, { "epoch": 0.01, "learning_rate": 2.0833333333333334e-06, "loss": 0.101, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.1354166666666666e-06, "loss": 0.2184, "step": 41 }, { "epoch": 0.01, "learning_rate": 2.1875000000000002e-06, "loss": 0.1493, "step": 42 }, { "epoch": 0.01, "learning_rate": 2.2395833333333334e-06, "loss": 0.1748, "step": 43 }, { "epoch": 0.01, "learning_rate": 2.2916666666666666e-06, "loss": 0.2109, "step": 44 }, { "epoch": 0.01, "learning_rate": 2.3437500000000002e-06, "loss": 0.2013, "step": 45 }, { "epoch": 0.01, "learning_rate": 2.395833333333334e-06, "loss": 0.1388, "step": 46 }, { "epoch": 0.01, "learning_rate": 2.4479166666666666e-06, "loss": 0.1674, "step": 47 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 0.1681, "step": 48 }, { "epoch": 0.01, "learning_rate": 2.5520833333333334e-06, "loss": 0.1293, "step": 49 }, { "epoch": 0.01, "learning_rate": 2.604166666666667e-06, "loss": 0.1604, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.65625e-06, "loss": 0.1759, "step": 51 }, { "epoch": 0.01, "learning_rate": 2.7083333333333334e-06, "loss": 0.1775, "step": 52 }, { "epoch": 0.01, "learning_rate": 2.760416666666667e-06, "loss": 0.0744, "step": 53 }, { "epoch": 0.01, "learning_rate": 2.8125e-06, "loss": 0.0996, "step": 54 }, { "epoch": 0.01, "learning_rate": 2.8645833333333334e-06, "loss": 0.0436, "step": 55 }, { "epoch": 0.01, "learning_rate": 2.916666666666667e-06, "loss": 0.1715, "step": 56 }, { "epoch": 0.01, "learning_rate": 2.96875e-06, "loss": 0.0929, "step": 57 }, { "epoch": 0.01, "learning_rate": 3.0208333333333334e-06, "loss": 0.2445, "step": 58 }, { "epoch": 0.01, "learning_rate": 3.072916666666667e-06, "loss": 0.0868, "step": 59 }, { "epoch": 0.01, "learning_rate": 3.125e-06, "loss": 0.1626, "step": 60 }, { "epoch": 0.01, "learning_rate": 3.1770833333333333e-06, "loss": 0.065, "step": 61 }, { "epoch": 0.01, "learning_rate": 3.229166666666667e-06, "loss": 0.0813, "step": 62 }, { "epoch": 0.01, "learning_rate": 3.28125e-06, "loss": 0.1326, "step": 63 }, { "epoch": 0.02, "learning_rate": 3.3333333333333333e-06, "loss": 0.0751, "step": 64 }, { "epoch": 0.02, "learning_rate": 3.385416666666667e-06, "loss": 0.1917, "step": 65 }, { "epoch": 0.02, "learning_rate": 3.4375e-06, "loss": 0.1485, "step": 66 }, { "epoch": 0.02, "learning_rate": 3.4895833333333333e-06, "loss": 0.2112, "step": 67 }, { "epoch": 0.02, "learning_rate": 3.5416666666666673e-06, "loss": 0.1976, "step": 68 }, { "epoch": 0.02, "learning_rate": 3.59375e-06, "loss": 0.2298, "step": 69 }, { "epoch": 0.02, "learning_rate": 3.6458333333333333e-06, "loss": 0.2238, "step": 70 }, { "epoch": 0.02, "learning_rate": 3.6979166666666673e-06, "loss": 0.2145, "step": 71 }, { "epoch": 0.02, "learning_rate": 3.7500000000000005e-06, "loss": 0.2743, "step": 72 }, { "epoch": 0.02, "learning_rate": 3.8020833333333333e-06, "loss": 0.2348, "step": 73 }, { "epoch": 0.02, "learning_rate": 3.854166666666667e-06, "loss": 0.1418, "step": 74 }, { "epoch": 0.02, "learning_rate": 3.90625e-06, "loss": 0.1666, "step": 75 }, { "epoch": 0.02, "learning_rate": 3.958333333333333e-06, "loss": 0.2609, "step": 76 }, { "epoch": 0.02, "learning_rate": 4.010416666666667e-06, "loss": 0.2523, "step": 77 }, { "epoch": 0.02, "learning_rate": 4.0625000000000005e-06, "loss": 0.1373, "step": 78 }, { "epoch": 0.02, "learning_rate": 4.114583333333334e-06, "loss": 0.2044, "step": 79 }, { "epoch": 0.02, "learning_rate": 4.166666666666667e-06, "loss": 0.2503, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.21875e-06, "loss": 0.1543, "step": 81 }, { "epoch": 0.02, "learning_rate": 4.270833333333333e-06, "loss": 0.2252, "step": 82 }, { "epoch": 0.02, "learning_rate": 4.322916666666667e-06, "loss": 0.1392, "step": 83 }, { "epoch": 0.02, "learning_rate": 4.3750000000000005e-06, "loss": 0.1164, "step": 84 }, { "epoch": 0.02, "learning_rate": 4.427083333333334e-06, "loss": 0.1739, "step": 85 }, { "epoch": 0.02, "learning_rate": 4.479166666666667e-06, "loss": 0.244, "step": 86 }, { "epoch": 0.02, "learning_rate": 4.53125e-06, "loss": 0.1899, "step": 87 }, { "epoch": 0.02, "learning_rate": 4.583333333333333e-06, "loss": 0.0731, "step": 88 }, { "epoch": 0.02, "learning_rate": 4.635416666666667e-06, "loss": 0.0614, "step": 89 }, { "epoch": 0.02, "learning_rate": 4.6875000000000004e-06, "loss": 0.0892, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.739583333333334e-06, "loss": 0.114, "step": 91 }, { "epoch": 0.02, "learning_rate": 4.791666666666668e-06, "loss": 0.131, "step": 92 }, { "epoch": 0.02, "learning_rate": 4.84375e-06, "loss": 0.1445, "step": 93 }, { "epoch": 0.02, "learning_rate": 4.895833333333333e-06, "loss": 0.1978, "step": 94 }, { "epoch": 0.02, "learning_rate": 4.947916666666667e-06, "loss": 0.1754, "step": 95 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 0.1327, "step": 96 }, { "epoch": 0.02, "learning_rate": 5.0520833333333344e-06, "loss": 0.0671, "step": 97 }, { "epoch": 0.02, "learning_rate": 5.104166666666667e-06, "loss": 0.1404, "step": 98 }, { "epoch": 0.02, "learning_rate": 5.156250000000001e-06, "loss": 0.1898, "step": 99 }, { "epoch": 0.02, "learning_rate": 5.208333333333334e-06, "loss": 0.1684, "step": 100 }, { "epoch": 0.02, "learning_rate": 5.260416666666666e-06, "loss": 0.0863, "step": 101 }, { "epoch": 0.02, "learning_rate": 5.3125e-06, "loss": 0.0621, "step": 102 }, { "epoch": 0.02, "learning_rate": 5.364583333333334e-06, "loss": 0.0937, "step": 103 }, { "epoch": 0.02, "learning_rate": 5.416666666666667e-06, "loss": 0.0648, "step": 104 }, { "epoch": 0.02, "learning_rate": 5.468750000000001e-06, "loss": 0.1064, "step": 105 }, { "epoch": 0.02, "learning_rate": 5.520833333333334e-06, "loss": 0.1584, "step": 106 }, { "epoch": 0.03, "learning_rate": 5.572916666666667e-06, "loss": 0.0883, "step": 107 }, { "epoch": 0.03, "learning_rate": 5.625e-06, "loss": 0.0515, "step": 108 }, { "epoch": 0.03, "learning_rate": 5.677083333333334e-06, "loss": 0.0584, "step": 109 }, { "epoch": 0.03, "learning_rate": 5.729166666666667e-06, "loss": 0.0968, "step": 110 }, { "epoch": 0.03, "learning_rate": 5.781250000000001e-06, "loss": 0.0917, "step": 111 }, { "epoch": 0.03, "learning_rate": 5.833333333333334e-06, "loss": 0.0807, "step": 112 }, { "epoch": 0.03, "learning_rate": 5.885416666666667e-06, "loss": 0.1357, "step": 113 }, { "epoch": 0.03, "learning_rate": 5.9375e-06, "loss": 0.0808, "step": 114 }, { "epoch": 0.03, "learning_rate": 5.989583333333334e-06, "loss": 0.1146, "step": 115 }, { "epoch": 0.03, "learning_rate": 6.041666666666667e-06, "loss": 0.2319, "step": 116 }, { "epoch": 0.03, "learning_rate": 6.093750000000001e-06, "loss": 0.1941, "step": 117 }, { "epoch": 0.03, "learning_rate": 6.145833333333334e-06, "loss": 0.1286, "step": 118 }, { "epoch": 0.03, "learning_rate": 6.197916666666667e-06, "loss": 0.0899, "step": 119 }, { "epoch": 0.03, "learning_rate": 6.25e-06, "loss": 0.0666, "step": 120 }, { "epoch": 0.03, "learning_rate": 6.302083333333334e-06, "loss": 0.111, "step": 121 }, { "epoch": 0.03, "learning_rate": 6.354166666666667e-06, "loss": 0.1658, "step": 122 }, { "epoch": 0.03, "learning_rate": 6.406250000000001e-06, "loss": 0.0912, "step": 123 }, { "epoch": 0.03, "learning_rate": 6.458333333333334e-06, "loss": 0.0847, "step": 124 }, { "epoch": 0.03, "learning_rate": 6.510416666666667e-06, "loss": 0.1125, "step": 125 }, { "epoch": 0.03, "learning_rate": 6.5625e-06, "loss": 0.0726, "step": 126 }, { "epoch": 0.03, "learning_rate": 6.614583333333334e-06, "loss": 0.1316, "step": 127 }, { "epoch": 0.03, "learning_rate": 6.666666666666667e-06, "loss": 0.0558, "step": 128 }, { "epoch": 0.03, "learning_rate": 6.718750000000001e-06, "loss": 0.1404, "step": 129 }, { "epoch": 0.03, "learning_rate": 6.770833333333334e-06, "loss": 0.1306, "step": 130 }, { "epoch": 0.03, "learning_rate": 6.822916666666667e-06, "loss": 0.0665, "step": 131 }, { "epoch": 0.03, "learning_rate": 6.875e-06, "loss": 0.1211, "step": 132 }, { "epoch": 0.03, "learning_rate": 6.927083333333334e-06, "loss": 0.1798, "step": 133 }, { "epoch": 0.03, "learning_rate": 6.979166666666667e-06, "loss": 0.114, "step": 134 }, { "epoch": 0.03, "learning_rate": 7.031250000000001e-06, "loss": 0.0978, "step": 135 }, { "epoch": 0.03, "learning_rate": 7.083333333333335e-06, "loss": 0.1675, "step": 136 }, { "epoch": 0.03, "learning_rate": 7.135416666666667e-06, "loss": 0.3907, "step": 137 }, { "epoch": 0.03, "learning_rate": 7.1875e-06, "loss": 0.1415, "step": 138 }, { "epoch": 0.03, "learning_rate": 7.239583333333334e-06, "loss": 0.1064, "step": 139 }, { "epoch": 0.03, "learning_rate": 7.291666666666667e-06, "loss": 0.0952, "step": 140 }, { "epoch": 0.03, "learning_rate": 7.343750000000001e-06, "loss": 0.0589, "step": 141 }, { "epoch": 0.03, "learning_rate": 7.395833333333335e-06, "loss": 0.0803, "step": 142 }, { "epoch": 0.03, "learning_rate": 7.447916666666667e-06, "loss": 0.0768, "step": 143 }, { "epoch": 0.03, "learning_rate": 7.500000000000001e-06, "loss": 0.0582, "step": 144 }, { "epoch": 0.03, "learning_rate": 7.552083333333334e-06, "loss": 0.0682, "step": 145 }, { "epoch": 0.03, "learning_rate": 7.6041666666666666e-06, "loss": 0.184, "step": 146 }, { "epoch": 0.03, "learning_rate": 7.656250000000001e-06, "loss": 0.0475, "step": 147 }, { "epoch": 0.03, "learning_rate": 7.708333333333334e-06, "loss": 0.0369, "step": 148 }, { "epoch": 0.03, "learning_rate": 7.760416666666666e-06, "loss": 0.2564, "step": 149 }, { "epoch": 0.04, "learning_rate": 7.8125e-06, "loss": 0.017, "step": 150 }, { "epoch": 0.04, "learning_rate": 7.864583333333334e-06, "loss": 0.1283, "step": 151 }, { "epoch": 0.04, "learning_rate": 7.916666666666667e-06, "loss": 0.0198, "step": 152 }, { "epoch": 0.04, "learning_rate": 7.96875e-06, "loss": 0.2282, "step": 153 }, { "epoch": 0.04, "learning_rate": 8.020833333333335e-06, "loss": 0.2504, "step": 154 }, { "epoch": 0.04, "learning_rate": 8.072916666666667e-06, "loss": 0.0596, "step": 155 }, { "epoch": 0.04, "learning_rate": 8.125000000000001e-06, "loss": 0.072, "step": 156 }, { "epoch": 0.04, "learning_rate": 8.177083333333335e-06, "loss": 0.146, "step": 157 }, { "epoch": 0.04, "learning_rate": 8.229166666666667e-06, "loss": 0.0991, "step": 158 }, { "epoch": 0.04, "learning_rate": 8.281250000000001e-06, "loss": 0.1902, "step": 159 }, { "epoch": 0.04, "learning_rate": 8.333333333333334e-06, "loss": 0.0586, "step": 160 }, { "epoch": 0.04, "learning_rate": 8.385416666666668e-06, "loss": 0.0997, "step": 161 }, { "epoch": 0.04, "learning_rate": 8.4375e-06, "loss": 0.0921, "step": 162 }, { "epoch": 0.04, "learning_rate": 8.489583333333334e-06, "loss": 0.0701, "step": 163 }, { "epoch": 0.04, "learning_rate": 8.541666666666666e-06, "loss": 0.0646, "step": 164 }, { "epoch": 0.04, "learning_rate": 8.59375e-06, "loss": 0.0602, "step": 165 }, { "epoch": 0.04, "learning_rate": 8.645833333333335e-06, "loss": 0.0773, "step": 166 }, { "epoch": 0.04, "learning_rate": 8.697916666666667e-06, "loss": 0.1348, "step": 167 }, { "epoch": 0.04, "learning_rate": 8.750000000000001e-06, "loss": 0.0634, "step": 168 }, { "epoch": 0.04, "learning_rate": 8.802083333333335e-06, "loss": 0.1767, "step": 169 }, { "epoch": 0.04, "learning_rate": 8.854166666666667e-06, "loss": 0.1321, "step": 170 }, { "epoch": 0.04, "learning_rate": 8.906250000000001e-06, "loss": 0.0857, "step": 171 }, { "epoch": 0.04, "learning_rate": 8.958333333333334e-06, "loss": 0.0419, "step": 172 }, { "epoch": 0.04, "learning_rate": 9.010416666666668e-06, "loss": 0.0709, "step": 173 }, { "epoch": 0.04, "learning_rate": 9.0625e-06, "loss": 0.1493, "step": 174 }, { "epoch": 0.04, "learning_rate": 9.114583333333334e-06, "loss": 0.0991, "step": 175 }, { "epoch": 0.04, "learning_rate": 9.166666666666666e-06, "loss": 0.1024, "step": 176 }, { "epoch": 0.04, "learning_rate": 9.21875e-06, "loss": 0.0885, "step": 177 }, { "epoch": 0.04, "learning_rate": 9.270833333333334e-06, "loss": 0.162, "step": 178 }, { "epoch": 0.04, "learning_rate": 9.322916666666667e-06, "loss": 0.1626, "step": 179 }, { "epoch": 0.04, "learning_rate": 9.375000000000001e-06, "loss": 0.108, "step": 180 }, { "epoch": 0.04, "learning_rate": 9.427083333333335e-06, "loss": 0.1099, "step": 181 }, { "epoch": 0.04, "learning_rate": 9.479166666666667e-06, "loss": 0.0547, "step": 182 }, { "epoch": 0.04, "learning_rate": 9.531250000000001e-06, "loss": 0.1452, "step": 183 }, { "epoch": 0.04, "learning_rate": 9.583333333333335e-06, "loss": 0.1096, "step": 184 }, { "epoch": 0.04, "learning_rate": 9.635416666666668e-06, "loss": 0.0703, "step": 185 }, { "epoch": 0.04, "learning_rate": 9.6875e-06, "loss": 0.1031, "step": 186 }, { "epoch": 0.04, "learning_rate": 9.739583333333334e-06, "loss": 0.0438, "step": 187 }, { "epoch": 0.04, "learning_rate": 9.791666666666666e-06, "loss": 0.0351, "step": 188 }, { "epoch": 0.04, "learning_rate": 9.84375e-06, "loss": 0.1236, "step": 189 }, { "epoch": 0.04, "learning_rate": 9.895833333333334e-06, "loss": 0.1133, "step": 190 }, { "epoch": 0.04, "learning_rate": 9.947916666666667e-06, "loss": 0.0673, "step": 191 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 0.1672, "step": 192 }, { "epoch": 0.05, "learning_rate": 1.0052083333333333e-05, "loss": 0.1115, "step": 193 }, { "epoch": 0.05, "learning_rate": 1.0104166666666669e-05, "loss": 0.1317, "step": 194 }, { "epoch": 0.05, "learning_rate": 1.0156250000000001e-05, "loss": 0.0921, "step": 195 }, { "epoch": 0.05, "learning_rate": 1.0208333333333334e-05, "loss": 0.1009, "step": 196 }, { "epoch": 0.05, "learning_rate": 1.0260416666666668e-05, "loss": 0.0874, "step": 197 }, { "epoch": 0.05, "learning_rate": 1.0312500000000002e-05, "loss": 0.0906, "step": 198 }, { "epoch": 0.05, "learning_rate": 1.0364583333333334e-05, "loss": 0.1017, "step": 199 }, { "epoch": 0.05, "learning_rate": 1.0416666666666668e-05, "loss": 0.111, "step": 200 }, { "epoch": 0.05, "learning_rate": 1.046875e-05, "loss": 0.056, "step": 201 }, { "epoch": 0.05, "learning_rate": 1.0520833333333333e-05, "loss": 0.0806, "step": 202 }, { "epoch": 0.05, "learning_rate": 1.0572916666666668e-05, "loss": 0.0456, "step": 203 }, { "epoch": 0.05, "learning_rate": 1.0625e-05, "loss": 0.0916, "step": 204 }, { "epoch": 0.05, "learning_rate": 1.0677083333333333e-05, "loss": 0.0676, "step": 205 }, { "epoch": 0.05, "learning_rate": 1.0729166666666669e-05, "loss": 0.1015, "step": 206 }, { "epoch": 0.05, "learning_rate": 1.0781250000000001e-05, "loss": 0.0775, "step": 207 }, { "epoch": 0.05, "learning_rate": 1.0833333333333334e-05, "loss": 0.1192, "step": 208 }, { "epoch": 0.05, "learning_rate": 1.0885416666666668e-05, "loss": 0.1093, "step": 209 }, { "epoch": 0.05, "learning_rate": 1.0937500000000002e-05, "loss": 0.0984, "step": 210 }, { "epoch": 0.05, "learning_rate": 1.0989583333333334e-05, "loss": 0.0803, "step": 211 }, { "epoch": 0.05, "learning_rate": 1.1041666666666668e-05, "loss": 0.1861, "step": 212 }, { "epoch": 0.05, "learning_rate": 1.109375e-05, "loss": 0.272, "step": 213 }, { "epoch": 0.05, "learning_rate": 1.1145833333333334e-05, "loss": 0.1417, "step": 214 }, { "epoch": 0.05, "learning_rate": 1.1197916666666668e-05, "loss": 0.0559, "step": 215 }, { "epoch": 0.05, "learning_rate": 1.125e-05, "loss": 0.0798, "step": 216 }, { "epoch": 0.05, "learning_rate": 1.1302083333333333e-05, "loss": 0.1678, "step": 217 }, { "epoch": 0.05, "learning_rate": 1.1354166666666669e-05, "loss": 0.1081, "step": 218 }, { "epoch": 0.05, "learning_rate": 1.1406250000000001e-05, "loss": 0.0562, "step": 219 }, { "epoch": 0.05, "learning_rate": 1.1458333333333333e-05, "loss": 0.1172, "step": 220 }, { "epoch": 0.05, "learning_rate": 1.151041666666667e-05, "loss": 0.1194, "step": 221 }, { "epoch": 0.05, "learning_rate": 1.1562500000000002e-05, "loss": 0.1595, "step": 222 }, { "epoch": 0.05, "learning_rate": 1.1614583333333334e-05, "loss": 0.1615, "step": 223 }, { "epoch": 0.05, "learning_rate": 1.1666666666666668e-05, "loss": 0.1286, "step": 224 }, { "epoch": 0.05, "learning_rate": 1.171875e-05, "loss": 0.0928, "step": 225 }, { "epoch": 0.05, "learning_rate": 1.1770833333333334e-05, "loss": 0.1807, "step": 226 }, { "epoch": 0.05, "learning_rate": 1.1822916666666668e-05, "loss": 0.1135, "step": 227 }, { "epoch": 0.05, "learning_rate": 1.1875e-05, "loss": 0.0599, "step": 228 }, { "epoch": 0.05, "learning_rate": 1.1927083333333333e-05, "loss": 0.21, "step": 229 }, { "epoch": 0.05, "learning_rate": 1.1979166666666669e-05, "loss": 0.0985, "step": 230 }, { "epoch": 0.05, "learning_rate": 1.2031250000000001e-05, "loss": 0.1352, "step": 231 }, { "epoch": 0.05, "learning_rate": 1.2083333333333333e-05, "loss": 0.0529, "step": 232 }, { "epoch": 0.05, "learning_rate": 1.2135416666666669e-05, "loss": 0.0378, "step": 233 }, { "epoch": 0.05, "learning_rate": 1.2187500000000001e-05, "loss": 0.105, "step": 234 }, { "epoch": 0.06, "learning_rate": 1.2239583333333334e-05, "loss": 0.1446, "step": 235 }, { "epoch": 0.06, "learning_rate": 1.2291666666666668e-05, "loss": 0.1359, "step": 236 }, { "epoch": 0.06, "learning_rate": 1.234375e-05, "loss": 0.0829, "step": 237 }, { "epoch": 0.06, "learning_rate": 1.2395833333333334e-05, "loss": 0.1044, "step": 238 }, { "epoch": 0.06, "learning_rate": 1.2447916666666668e-05, "loss": 0.4836, "step": 239 }, { "epoch": 0.06, "learning_rate": 1.25e-05, "loss": 2.3448, "step": 240 }, { "epoch": 0.06, "learning_rate": 1.2552083333333333e-05, "loss": 1.1698, "step": 241 }, { "epoch": 0.06, "learning_rate": 1.2604166666666669e-05, "loss": 3.3319, "step": 242 }, { "epoch": 0.06, "learning_rate": 1.2656250000000001e-05, "loss": 0.8964, "step": 243 }, { "epoch": 0.06, "learning_rate": 1.2708333333333333e-05, "loss": 0.158, "step": 244 }, { "epoch": 0.06, "learning_rate": 1.2760416666666669e-05, "loss": 0.1507, "step": 245 }, { "epoch": 0.06, "learning_rate": 1.2812500000000001e-05, "loss": 0.0787, "step": 246 }, { "epoch": 0.06, "learning_rate": 1.2864583333333334e-05, "loss": 0.2234, "step": 247 }, { "epoch": 0.06, "learning_rate": 1.2916666666666668e-05, "loss": 0.2953, "step": 248 }, { "epoch": 0.06, "learning_rate": 1.2968750000000002e-05, "loss": 0.1245, "step": 249 }, { "epoch": 0.06, "learning_rate": 1.3020833333333334e-05, "loss": 0.0784, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.3072916666666668e-05, "loss": 1.8571, "step": 251 }, { "epoch": 0.06, "learning_rate": 1.3125e-05, "loss": 0.2279, "step": 252 }, { "epoch": 0.06, "learning_rate": 1.3177083333333333e-05, "loss": 0.1914, "step": 253 }, { "epoch": 0.06, "learning_rate": 1.3229166666666669e-05, "loss": 0.0543, "step": 254 }, { "epoch": 0.06, "learning_rate": 1.3281250000000001e-05, "loss": 0.0607, "step": 255 }, { "epoch": 0.06, "learning_rate": 1.3333333333333333e-05, "loss": 0.1131, "step": 256 }, { "epoch": 0.06, "learning_rate": 1.3385416666666669e-05, "loss": 0.1425, "step": 257 }, { "epoch": 0.06, "learning_rate": 1.3437500000000001e-05, "loss": 0.0818, "step": 258 }, { "epoch": 0.06, "learning_rate": 1.3489583333333334e-05, "loss": 0.0523, "step": 259 }, { "epoch": 0.06, "learning_rate": 1.3541666666666668e-05, "loss": 0.0787, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.3593750000000002e-05, "loss": 0.0668, "step": 261 }, { "epoch": 0.06, "learning_rate": 1.3645833333333334e-05, "loss": 0.1213, "step": 262 }, { "epoch": 0.06, "learning_rate": 1.3697916666666668e-05, "loss": 0.0697, "step": 263 }, { "epoch": 0.06, "learning_rate": 1.375e-05, "loss": 0.0738, "step": 264 }, { "epoch": 0.06, "learning_rate": 1.3802083333333335e-05, "loss": 0.121, "step": 265 }, { "epoch": 0.06, "learning_rate": 1.3854166666666669e-05, "loss": 0.2375, "step": 266 }, { "epoch": 0.06, "learning_rate": 1.3906250000000001e-05, "loss": 0.1922, "step": 267 }, { "epoch": 0.06, "learning_rate": 1.3958333333333333e-05, "loss": 0.1252, "step": 268 }, { "epoch": 0.06, "learning_rate": 1.4010416666666669e-05, "loss": 0.208, "step": 269 }, { "epoch": 0.06, "learning_rate": 1.4062500000000001e-05, "loss": 0.2078, "step": 270 }, { "epoch": 0.06, "learning_rate": 1.4114583333333334e-05, "loss": 0.1029, "step": 271 }, { "epoch": 0.06, "learning_rate": 1.416666666666667e-05, "loss": 0.3104, "step": 272 }, { "epoch": 0.06, "learning_rate": 1.4218750000000002e-05, "loss": 0.2258, "step": 273 }, { "epoch": 0.06, "learning_rate": 1.4270833333333334e-05, "loss": 0.0412, "step": 274 }, { "epoch": 0.06, "learning_rate": 1.4322916666666668e-05, "loss": 0.0329, "step": 275 }, { "epoch": 0.06, "learning_rate": 1.4375e-05, "loss": 0.1897, "step": 276 }, { "epoch": 0.07, "learning_rate": 1.4427083333333334e-05, "loss": 0.0317, "step": 277 }, { "epoch": 0.07, "learning_rate": 1.4479166666666669e-05, "loss": 0.1248, "step": 278 }, { "epoch": 0.07, "learning_rate": 1.453125e-05, "loss": 0.0932, "step": 279 }, { "epoch": 0.07, "learning_rate": 1.4583333333333333e-05, "loss": 0.0881, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.4635416666666669e-05, "loss": 0.1564, "step": 281 }, { "epoch": 0.07, "learning_rate": 1.4687500000000001e-05, "loss": 0.0282, "step": 282 }, { "epoch": 0.07, "learning_rate": 1.4739583333333334e-05, "loss": 0.0979, "step": 283 }, { "epoch": 0.07, "learning_rate": 1.479166666666667e-05, "loss": 0.188, "step": 284 }, { "epoch": 0.07, "learning_rate": 1.4843750000000002e-05, "loss": 0.1747, "step": 285 }, { "epoch": 0.07, "learning_rate": 1.4895833333333334e-05, "loss": 0.288, "step": 286 }, { "epoch": 0.07, "learning_rate": 1.4947916666666668e-05, "loss": 0.2165, "step": 287 }, { "epoch": 0.07, "learning_rate": 1.5000000000000002e-05, "loss": 0.1527, "step": 288 }, { "epoch": 0.07, "learning_rate": 1.5052083333333334e-05, "loss": 0.0807, "step": 289 }, { "epoch": 0.07, "learning_rate": 1.5104166666666668e-05, "loss": 0.2492, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.515625e-05, "loss": 0.0633, "step": 291 }, { "epoch": 0.07, "learning_rate": 1.5208333333333333e-05, "loss": 0.1052, "step": 292 }, { "epoch": 0.07, "learning_rate": 1.5260416666666667e-05, "loss": 0.0547, "step": 293 }, { "epoch": 0.07, "learning_rate": 1.5312500000000003e-05, "loss": 0.1048, "step": 294 }, { "epoch": 0.07, "learning_rate": 1.5364583333333335e-05, "loss": 0.0771, "step": 295 }, { "epoch": 0.07, "learning_rate": 1.5416666666666668e-05, "loss": 0.0555, "step": 296 }, { "epoch": 0.07, "learning_rate": 1.546875e-05, "loss": 0.109, "step": 297 }, { "epoch": 0.07, "learning_rate": 1.5520833333333332e-05, "loss": 0.1163, "step": 298 }, { "epoch": 0.07, "learning_rate": 1.5572916666666668e-05, "loss": 0.0389, "step": 299 }, { "epoch": 0.07, "learning_rate": 1.5625e-05, "loss": 0.1178, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.5677083333333333e-05, "loss": 0.154, "step": 301 }, { "epoch": 0.07, "learning_rate": 1.572916666666667e-05, "loss": 0.0832, "step": 302 }, { "epoch": 0.07, "learning_rate": 1.578125e-05, "loss": 0.1601, "step": 303 }, { "epoch": 0.07, "learning_rate": 1.5833333333333333e-05, "loss": 0.0568, "step": 304 }, { "epoch": 0.07, "learning_rate": 1.588541666666667e-05, "loss": 0.1455, "step": 305 }, { "epoch": 0.07, "learning_rate": 1.59375e-05, "loss": 0.1472, "step": 306 }, { "epoch": 0.07, "learning_rate": 1.5989583333333333e-05, "loss": 0.0761, "step": 307 }, { "epoch": 0.07, "learning_rate": 1.604166666666667e-05, "loss": 0.0675, "step": 308 }, { "epoch": 0.07, "learning_rate": 1.609375e-05, "loss": 0.0625, "step": 309 }, { "epoch": 0.07, "learning_rate": 1.6145833333333334e-05, "loss": 0.0869, "step": 310 }, { "epoch": 0.07, "learning_rate": 1.619791666666667e-05, "loss": 0.0737, "step": 311 }, { "epoch": 0.07, "learning_rate": 1.6250000000000002e-05, "loss": 0.108, "step": 312 }, { "epoch": 0.07, "learning_rate": 1.6302083333333334e-05, "loss": 0.0821, "step": 313 }, { "epoch": 0.07, "learning_rate": 1.635416666666667e-05, "loss": 0.1442, "step": 314 }, { "epoch": 0.07, "learning_rate": 1.6406250000000002e-05, "loss": 0.0964, "step": 315 }, { "epoch": 0.07, "learning_rate": 1.6458333333333335e-05, "loss": 0.1792, "step": 316 }, { "epoch": 0.07, "learning_rate": 1.651041666666667e-05, "loss": 0.1271, "step": 317 }, { "epoch": 0.07, "learning_rate": 1.6562500000000003e-05, "loss": 0.1928, "step": 318 }, { "epoch": 0.07, "learning_rate": 1.6614583333333335e-05, "loss": 0.1059, "step": 319 }, { "epoch": 0.08, "learning_rate": 1.6666666666666667e-05, "loss": 0.2315, "step": 320 }, { "epoch": 0.08, "learning_rate": 1.671875e-05, "loss": 0.2702, "step": 321 }, { "epoch": 0.08, "learning_rate": 1.6770833333333336e-05, "loss": 0.217, "step": 322 }, { "epoch": 0.08, "learning_rate": 1.6822916666666668e-05, "loss": 0.1348, "step": 323 }, { "epoch": 0.08, "learning_rate": 1.6875e-05, "loss": 0.1122, "step": 324 }, { "epoch": 0.08, "learning_rate": 1.6927083333333333e-05, "loss": 0.1298, "step": 325 }, { "epoch": 0.08, "learning_rate": 1.6979166666666668e-05, "loss": 0.1335, "step": 326 }, { "epoch": 0.08, "learning_rate": 1.703125e-05, "loss": 0.1878, "step": 327 }, { "epoch": 0.08, "learning_rate": 1.7083333333333333e-05, "loss": 0.1094, "step": 328 }, { "epoch": 0.08, "learning_rate": 1.713541666666667e-05, "loss": 0.0477, "step": 329 }, { "epoch": 0.08, "learning_rate": 1.71875e-05, "loss": 0.1803, "step": 330 }, { "epoch": 0.08, "learning_rate": 1.7239583333333333e-05, "loss": 0.1983, "step": 331 }, { "epoch": 0.08, "learning_rate": 1.729166666666667e-05, "loss": 0.0285, "step": 332 }, { "epoch": 0.08, "learning_rate": 1.734375e-05, "loss": 0.0962, "step": 333 }, { "epoch": 0.08, "learning_rate": 1.7395833333333334e-05, "loss": 0.2628, "step": 334 }, { "epoch": 0.08, "learning_rate": 1.744791666666667e-05, "loss": 0.3071, "step": 335 }, { "epoch": 0.08, "learning_rate": 1.7500000000000002e-05, "loss": 0.1408, "step": 336 }, { "epoch": 0.08, "learning_rate": 1.7552083333333334e-05, "loss": 0.156, "step": 337 }, { "epoch": 0.08, "learning_rate": 1.760416666666667e-05, "loss": 0.0421, "step": 338 }, { "epoch": 0.08, "learning_rate": 1.7656250000000002e-05, "loss": 0.2202, "step": 339 }, { "epoch": 0.08, "learning_rate": 1.7708333333333335e-05, "loss": 0.1689, "step": 340 }, { "epoch": 0.08, "learning_rate": 1.776041666666667e-05, "loss": 0.1238, "step": 341 }, { "epoch": 0.08, "learning_rate": 1.7812500000000003e-05, "loss": 0.0209, "step": 342 }, { "epoch": 0.08, "learning_rate": 1.7864583333333335e-05, "loss": 0.0968, "step": 343 }, { "epoch": 0.08, "learning_rate": 1.7916666666666667e-05, "loss": 0.0565, "step": 344 }, { "epoch": 0.08, "learning_rate": 1.7968750000000003e-05, "loss": 0.1782, "step": 345 }, { "epoch": 0.08, "learning_rate": 1.8020833333333335e-05, "loss": 0.1007, "step": 346 }, { "epoch": 0.08, "learning_rate": 1.8072916666666668e-05, "loss": 0.1035, "step": 347 }, { "epoch": 0.08, "learning_rate": 1.8125e-05, "loss": 0.0433, "step": 348 }, { "epoch": 0.08, "learning_rate": 1.8177083333333332e-05, "loss": 0.0958, "step": 349 }, { "epoch": 0.08, "learning_rate": 1.8229166666666668e-05, "loss": 0.1082, "step": 350 }, { "epoch": 0.08, "learning_rate": 1.828125e-05, "loss": 0.119, "step": 351 }, { "epoch": 0.08, "learning_rate": 1.8333333333333333e-05, "loss": 0.0852, "step": 352 }, { "epoch": 0.08, "learning_rate": 1.838541666666667e-05, "loss": 0.1923, "step": 353 }, { "epoch": 0.08, "learning_rate": 1.84375e-05, "loss": 0.0836, "step": 354 }, { "epoch": 0.08, "learning_rate": 1.8489583333333333e-05, "loss": 0.0961, "step": 355 }, { "epoch": 0.08, "learning_rate": 1.854166666666667e-05, "loss": 0.148, "step": 356 }, { "epoch": 0.08, "learning_rate": 1.859375e-05, "loss": 0.2551, "step": 357 }, { "epoch": 0.08, "learning_rate": 1.8645833333333334e-05, "loss": 0.1332, "step": 358 }, { "epoch": 0.08, "learning_rate": 1.869791666666667e-05, "loss": 0.1429, "step": 359 }, { "epoch": 0.08, "learning_rate": 1.8750000000000002e-05, "loss": 0.108, "step": 360 }, { "epoch": 0.08, "learning_rate": 1.8802083333333334e-05, "loss": 0.1705, "step": 361 }, { "epoch": 0.08, "learning_rate": 1.885416666666667e-05, "loss": 0.0557, "step": 362 }, { "epoch": 0.09, "learning_rate": 1.8906250000000002e-05, "loss": 0.0806, "step": 363 }, { "epoch": 0.09, "learning_rate": 1.8958333333333334e-05, "loss": 0.049, "step": 364 }, { "epoch": 0.09, "learning_rate": 1.901041666666667e-05, "loss": 0.0471, "step": 365 }, { "epoch": 0.09, "learning_rate": 1.9062500000000003e-05, "loss": 0.1469, "step": 366 }, { "epoch": 0.09, "learning_rate": 1.9114583333333335e-05, "loss": 0.19, "step": 367 }, { "epoch": 0.09, "learning_rate": 1.916666666666667e-05, "loss": 0.0519, "step": 368 }, { "epoch": 0.09, "learning_rate": 1.9218750000000003e-05, "loss": 0.1001, "step": 369 }, { "epoch": 0.09, "learning_rate": 1.9270833333333335e-05, "loss": 0.1545, "step": 370 }, { "epoch": 0.09, "learning_rate": 1.9322916666666668e-05, "loss": 0.1737, "step": 371 }, { "epoch": 0.09, "learning_rate": 1.9375e-05, "loss": 0.0761, "step": 372 }, { "epoch": 0.09, "learning_rate": 1.9427083333333336e-05, "loss": 0.1807, "step": 373 }, { "epoch": 0.09, "learning_rate": 1.9479166666666668e-05, "loss": 0.1056, "step": 374 }, { "epoch": 0.09, "learning_rate": 1.953125e-05, "loss": 0.0884, "step": 375 }, { "epoch": 0.09, "learning_rate": 1.9583333333333333e-05, "loss": 0.1679, "step": 376 }, { "epoch": 0.09, "learning_rate": 1.963541666666667e-05, "loss": 0.1201, "step": 377 }, { "epoch": 0.09, "learning_rate": 1.96875e-05, "loss": 0.1342, "step": 378 }, { "epoch": 0.09, "learning_rate": 1.9739583333333333e-05, "loss": 0.2099, "step": 379 }, { "epoch": 0.09, "learning_rate": 1.979166666666667e-05, "loss": 0.128, "step": 380 }, { "epoch": 0.09, "learning_rate": 1.984375e-05, "loss": 0.1072, "step": 381 }, { "epoch": 0.09, "learning_rate": 1.9895833333333334e-05, "loss": 0.0936, "step": 382 }, { "epoch": 0.09, "learning_rate": 1.994791666666667e-05, "loss": 0.1094, "step": 383 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.1073, "step": 384 }, { "epoch": 0.09, "learning_rate": 1.9999999678850973e-05, "loss": 0.0906, "step": 385 }, { "epoch": 0.09, "learning_rate": 1.999999871540391e-05, "loss": 0.069, "step": 386 }, { "epoch": 0.09, "learning_rate": 1.999999710965887e-05, "loss": 0.1175, "step": 387 }, { "epoch": 0.09, "learning_rate": 1.9999994861615963e-05, "loss": 0.0513, "step": 388 }, { "epoch": 0.09, "learning_rate": 1.9999991971275327e-05, "loss": 0.1397, "step": 389 }, { "epoch": 0.09, "learning_rate": 1.999998843863715e-05, "loss": 0.08, "step": 390 }, { "epoch": 0.09, "learning_rate": 1.999998426370166e-05, "loss": 0.0934, "step": 391 }, { "epoch": 0.09, "learning_rate": 1.9999979446469126e-05, "loss": 0.0924, "step": 392 }, { "epoch": 0.09, "learning_rate": 1.999997398693985e-05, "loss": 0.0719, "step": 393 }, { "epoch": 0.09, "learning_rate": 1.9999967885114195e-05, "loss": 0.1187, "step": 394 }, { "epoch": 0.09, "learning_rate": 1.9999961140992542e-05, "loss": 0.0493, "step": 395 }, { "epoch": 0.09, "learning_rate": 1.9999953754575326e-05, "loss": 0.0642, "step": 396 }, { "epoch": 0.09, "learning_rate": 1.999994572586303e-05, "loss": 0.1595, "step": 397 }, { "epoch": 0.09, "learning_rate": 1.9999937054856157e-05, "loss": 0.0898, "step": 398 }, { "epoch": 0.09, "learning_rate": 1.9999927741555277e-05, "loss": 0.0995, "step": 399 }, { "epoch": 0.09, "learning_rate": 1.9999917785960975e-05, "loss": 0.126, "step": 400 }, { "epoch": 0.09, "learning_rate": 1.9999907188073905e-05, "loss": 0.1283, "step": 401 }, { "epoch": 0.09, "learning_rate": 1.9999895947894735e-05, "loss": 0.1697, "step": 402 }, { "epoch": 0.09, "learning_rate": 1.9999884065424192e-05, "loss": 0.2266, "step": 403 }, { "epoch": 0.09, "learning_rate": 1.999987154066304e-05, "loss": 0.1039, "step": 404 }, { "epoch": 0.1, "learning_rate": 1.9999858373612077e-05, "loss": 0.1217, "step": 405 }, { "epoch": 0.1, "learning_rate": 1.999984456427216e-05, "loss": 0.1265, "step": 406 }, { "epoch": 0.1, "learning_rate": 1.999983011264417e-05, "loss": 0.2336, "step": 407 }, { "epoch": 0.1, "learning_rate": 1.9999815018729036e-05, "loss": 0.2527, "step": 408 }, { "epoch": 0.1, "learning_rate": 1.9999799282527723e-05, "loss": 0.1343, "step": 409 }, { "epoch": 0.1, "learning_rate": 1.9999782904041245e-05, "loss": 0.1091, "step": 410 }, { "epoch": 0.1, "learning_rate": 1.9999765883270655e-05, "loss": 0.1604, "step": 411 }, { "epoch": 0.1, "learning_rate": 1.999974822021705e-05, "loss": 0.0864, "step": 412 }, { "epoch": 0.1, "learning_rate": 1.9999729914881557e-05, "loss": 0.1114, "step": 413 }, { "epoch": 0.1, "learning_rate": 1.9999710967265356e-05, "loss": 0.0928, "step": 414 }, { "epoch": 0.1, "learning_rate": 1.999969137736966e-05, "loss": 0.0661, "step": 415 }, { "epoch": 0.1, "learning_rate": 1.9999671145195733e-05, "loss": 0.0601, "step": 416 }, { "epoch": 0.1, "learning_rate": 1.9999650270744874e-05, "loss": 0.0745, "step": 417 }, { "epoch": 0.1, "learning_rate": 1.999962875401842e-05, "loss": 0.1339, "step": 418 }, { "epoch": 0.1, "learning_rate": 1.9999606595017752e-05, "loss": 0.1877, "step": 419 }, { "epoch": 0.1, "learning_rate": 1.9999583793744296e-05, "loss": 0.0652, "step": 420 }, { "epoch": 0.1, "learning_rate": 1.999956035019952e-05, "loss": 0.1319, "step": 421 }, { "epoch": 0.1, "learning_rate": 1.9999536264384925e-05, "loss": 0.1532, "step": 422 }, { "epoch": 0.1, "learning_rate": 1.9999511536302055e-05, "loss": 0.0577, "step": 423 }, { "epoch": 0.1, "learning_rate": 1.9999486165952507e-05, "loss": 0.062, "step": 424 }, { "epoch": 0.1, "learning_rate": 1.9999460153337905e-05, "loss": 0.1109, "step": 425 }, { "epoch": 0.1, "learning_rate": 1.999943349845992e-05, "loss": 0.1518, "step": 426 }, { "epoch": 0.1, "learning_rate": 1.9999406201320263e-05, "loss": 0.1773, "step": 427 }, { "epoch": 0.1, "learning_rate": 1.999937826192069e-05, "loss": 0.3026, "step": 428 }, { "epoch": 0.1, "learning_rate": 1.9999349680262996e-05, "loss": 0.1118, "step": 429 }, { "epoch": 0.1, "learning_rate": 1.9999320456349014e-05, "loss": 0.2127, "step": 430 }, { "epoch": 0.1, "learning_rate": 1.9999290590180624e-05, "loss": 0.1207, "step": 431 }, { "epoch": 0.1, "learning_rate": 1.999926008175974e-05, "loss": 0.1164, "step": 432 }, { "epoch": 0.1, "learning_rate": 1.9999228931088326e-05, "loss": 0.1804, "step": 433 }, { "epoch": 0.1, "learning_rate": 1.9999197138168382e-05, "loss": 0.1305, "step": 434 }, { "epoch": 0.1, "learning_rate": 1.999916470300195e-05, "loss": 0.1387, "step": 435 }, { "epoch": 0.1, "learning_rate": 1.999913162559111e-05, "loss": 0.106, "step": 436 }, { "epoch": 0.1, "learning_rate": 1.9999097905937988e-05, "loss": 0.0471, "step": 437 }, { "epoch": 0.1, "learning_rate": 1.9999063544044754e-05, "loss": 0.1121, "step": 438 }, { "epoch": 0.1, "learning_rate": 1.999902853991361e-05, "loss": 0.0962, "step": 439 }, { "epoch": 0.1, "learning_rate": 1.9998992893546804e-05, "loss": 0.1324, "step": 440 }, { "epoch": 0.1, "learning_rate": 1.999895660494663e-05, "loss": 0.9082, "step": 441 }, { "epoch": 0.1, "learning_rate": 1.999891967411542e-05, "loss": 0.0969, "step": 442 }, { "epoch": 0.1, "learning_rate": 1.999888210105554e-05, "loss": 0.5965, "step": 443 }, { "epoch": 0.1, "learning_rate": 1.9998843885769404e-05, "loss": 0.1112, "step": 444 }, { "epoch": 0.1, "learning_rate": 1.9998805028259468e-05, "loss": 0.1685, "step": 445 }, { "epoch": 0.1, "learning_rate": 1.999876552852823e-05, "loss": 0.2283, "step": 446 }, { "epoch": 0.1, "learning_rate": 1.9998725386578225e-05, "loss": 0.1671, "step": 447 }, { "epoch": 0.11, "learning_rate": 1.9998684602412033e-05, "loss": 0.1682, "step": 448 }, { "epoch": 0.11, "learning_rate": 1.999864317603227e-05, "loss": 0.1299, "step": 449 }, { "epoch": 0.11, "learning_rate": 1.99986011074416e-05, "loss": 0.084, "step": 450 }, { "epoch": 0.11, "learning_rate": 1.9998558396642724e-05, "loss": 0.0516, "step": 451 }, { "epoch": 0.11, "learning_rate": 1.9998515043638385e-05, "loss": 0.0523, "step": 452 }, { "epoch": 0.11, "learning_rate": 1.9998471048431368e-05, "loss": 0.0761, "step": 453 }, { "epoch": 0.11, "learning_rate": 1.9998426411024502e-05, "loss": 0.0629, "step": 454 }, { "epoch": 0.11, "learning_rate": 1.9998381131420648e-05, "loss": 0.2894, "step": 455 }, { "epoch": 0.11, "learning_rate": 1.9998335209622715e-05, "loss": 0.0626, "step": 456 }, { "epoch": 0.11, "learning_rate": 1.9998288645633658e-05, "loss": 1.0283, "step": 457 }, { "epoch": 0.11, "learning_rate": 1.999824143945646e-05, "loss": 0.059, "step": 458 }, { "epoch": 0.11, "learning_rate": 1.999819359109416e-05, "loss": 0.1914, "step": 459 }, { "epoch": 0.11, "learning_rate": 1.999814510054983e-05, "loss": 0.1288, "step": 460 }, { "epoch": 0.11, "learning_rate": 1.9998095967826585e-05, "loss": 0.1078, "step": 461 }, { "epoch": 0.11, "learning_rate": 1.9998046192927576e-05, "loss": 0.1549, "step": 462 }, { "epoch": 0.11, "learning_rate": 1.9997995775856003e-05, "loss": 0.126, "step": 463 }, { "epoch": 0.11, "learning_rate": 1.9997944716615106e-05, "loss": 0.1685, "step": 464 }, { "epoch": 0.11, "learning_rate": 1.9997893015208163e-05, "loss": 0.0945, "step": 465 }, { "epoch": 0.11, "learning_rate": 1.9997840671638492e-05, "loss": 0.0929, "step": 466 }, { "epoch": 0.11, "learning_rate": 1.999778768590946e-05, "loss": 0.1482, "step": 467 }, { "epoch": 0.11, "learning_rate": 1.9997734058024467e-05, "loss": 0.0754, "step": 468 }, { "epoch": 0.11, "learning_rate": 1.9997679787986957e-05, "loss": 0.0754, "step": 469 }, { "epoch": 0.11, "learning_rate": 1.999762487580042e-05, "loss": 0.0781, "step": 470 }, { "epoch": 0.11, "learning_rate": 1.999756932146838e-05, "loss": 0.0551, "step": 471 }, { "epoch": 0.11, "learning_rate": 1.9997513124994404e-05, "loss": 0.0498, "step": 472 }, { "epoch": 0.11, "learning_rate": 1.9997456286382103e-05, "loss": 0.1151, "step": 473 }, { "epoch": 0.11, "learning_rate": 1.9997398805635127e-05, "loss": 0.1225, "step": 474 }, { "epoch": 0.11, "learning_rate": 1.9997340682757174e-05, "loss": 0.0602, "step": 475 }, { "epoch": 0.11, "learning_rate": 1.9997281917751966e-05, "loss": 0.1073, "step": 476 }, { "epoch": 0.11, "learning_rate": 1.9997222510623288e-05, "loss": 0.0619, "step": 477 }, { "epoch": 0.11, "learning_rate": 1.9997162461374948e-05, "loss": 0.1815, "step": 478 }, { "epoch": 0.11, "learning_rate": 1.9997101770010806e-05, "loss": 0.1334, "step": 479 }, { "epoch": 0.11, "learning_rate": 1.999704043653476e-05, "loss": 0.2104, "step": 480 }, { "epoch": 0.11, "learning_rate": 1.9996978460950747e-05, "loss": 0.0879, "step": 481 }, { "epoch": 0.11, "learning_rate": 1.999691584326276e-05, "loss": 0.1253, "step": 482 }, { "epoch": 0.11, "learning_rate": 1.99968525834748e-05, "loss": 0.1179, "step": 483 }, { "epoch": 0.11, "learning_rate": 1.9996788681590948e-05, "loss": 0.1102, "step": 484 }, { "epoch": 0.11, "learning_rate": 1.99967241376153e-05, "loss": 0.1132, "step": 485 }, { "epoch": 0.11, "learning_rate": 1.9996658951552005e-05, "loss": 0.1343, "step": 486 }, { "epoch": 0.11, "learning_rate": 1.9996593123405245e-05, "loss": 0.1071, "step": 487 }, { "epoch": 0.11, "learning_rate": 1.9996526653179254e-05, "loss": 0.0772, "step": 488 }, { "epoch": 0.11, "learning_rate": 1.9996459540878298e-05, "loss": 0.1231, "step": 489 }, { "epoch": 0.11, "learning_rate": 1.999639178650669e-05, "loss": 0.0759, "step": 490 }, { "epoch": 0.12, "learning_rate": 1.999632339006878e-05, "loss": 0.0865, "step": 491 }, { "epoch": 0.12, "learning_rate": 1.999625435156896e-05, "loss": 0.1054, "step": 492 }, { "epoch": 0.12, "learning_rate": 1.9996184671011664e-05, "loss": 0.0316, "step": 493 }, { "epoch": 0.12, "learning_rate": 1.9996114348401373e-05, "loss": 0.0707, "step": 494 }, { "epoch": 0.12, "learning_rate": 1.99960433837426e-05, "loss": 0.1272, "step": 495 }, { "epoch": 0.12, "learning_rate": 1.99959717770399e-05, "loss": 0.0451, "step": 496 }, { "epoch": 0.12, "learning_rate": 1.9995899528297878e-05, "loss": 0.1427, "step": 497 }, { "epoch": 0.12, "learning_rate": 1.999582663752117e-05, "loss": 0.0934, "step": 498 }, { "epoch": 0.12, "learning_rate": 1.9995753104714462e-05, "loss": 0.1932, "step": 499 }, { "epoch": 0.12, "learning_rate": 1.9995678929882475e-05, "loss": 0.1328, "step": 500 }, { "epoch": 0.12, "learning_rate": 1.999560411302997e-05, "loss": 0.0448, "step": 501 }, { "epoch": 0.12, "learning_rate": 1.9995528654161758e-05, "loss": 0.0365, "step": 502 }, { "epoch": 0.12, "learning_rate": 1.999545255328268e-05, "loss": 0.0537, "step": 503 }, { "epoch": 0.12, "learning_rate": 1.999537581039763e-05, "loss": 0.0612, "step": 504 }, { "epoch": 0.12, "learning_rate": 1.9995298425511534e-05, "loss": 0.0678, "step": 505 }, { "epoch": 0.12, "learning_rate": 1.999522039862936e-05, "loss": 0.1076, "step": 506 }, { "epoch": 0.12, "learning_rate": 1.9995141729756126e-05, "loss": 0.064, "step": 507 }, { "epoch": 0.12, "learning_rate": 1.9995062418896882e-05, "loss": 0.1206, "step": 508 }, { "epoch": 0.12, "learning_rate": 1.9994982466056716e-05, "loss": 0.613, "step": 509 }, { "epoch": 0.12, "learning_rate": 1.9994901871240773e-05, "loss": 0.6418, "step": 510 }, { "epoch": 0.12, "learning_rate": 1.9994820634454225e-05, "loss": 0.6883, "step": 511 }, { "epoch": 0.12, "learning_rate": 1.9994738755702288e-05, "loss": 0.1941, "step": 512 }, { "epoch": 0.12, "learning_rate": 1.9994656234990223e-05, "loss": 1.0916, "step": 513 }, { "epoch": 0.12, "learning_rate": 1.999457307232333e-05, "loss": 1.64, "step": 514 }, { "epoch": 0.12, "learning_rate": 1.9994489267706955e-05, "loss": 0.647, "step": 515 }, { "epoch": 0.12, "learning_rate": 1.9994404821146472e-05, "loss": 0.6144, "step": 516 }, { "epoch": 0.12, "learning_rate": 1.9994319732647315e-05, "loss": 0.2406, "step": 517 }, { "epoch": 0.12, "learning_rate": 1.999423400221494e-05, "loss": 0.2517, "step": 518 }, { "epoch": 0.12, "learning_rate": 1.999414762985486e-05, "loss": 0.3206, "step": 519 }, { "epoch": 0.12, "learning_rate": 1.999406061557262e-05, "loss": 0.5392, "step": 520 }, { "epoch": 0.12, "learning_rate": 1.9993972959373808e-05, "loss": 1.9625, "step": 521 }, { "epoch": 0.12, "learning_rate": 1.9993884661264054e-05, "loss": 0.3968, "step": 522 }, { "epoch": 0.12, "learning_rate": 1.999379572124903e-05, "loss": 0.1063, "step": 523 }, { "epoch": 0.12, "learning_rate": 1.999370613933445e-05, "loss": 0.2193, "step": 524 }, { "epoch": 0.12, "learning_rate": 1.9993615915526066e-05, "loss": 0.3082, "step": 525 }, { "epoch": 0.12, "learning_rate": 1.9993525049829677e-05, "loss": 0.2148, "step": 526 }, { "epoch": 0.12, "learning_rate": 1.9993433542251114e-05, "loss": 0.1961, "step": 527 }, { "epoch": 0.12, "learning_rate": 1.9993341392796255e-05, "loss": 0.1261, "step": 528 }, { "epoch": 0.12, "learning_rate": 1.9993248601471023e-05, "loss": 0.0811, "step": 529 }, { "epoch": 0.12, "learning_rate": 1.9993155168281374e-05, "loss": 0.1257, "step": 530 }, { "epoch": 0.12, "learning_rate": 1.9993061093233313e-05, "loss": 0.0906, "step": 531 }, { "epoch": 0.12, "learning_rate": 1.999296637633288e-05, "loss": 0.0809, "step": 532 }, { "epoch": 0.13, "learning_rate": 1.9992871017586156e-05, "loss": 0.2132, "step": 533 }, { "epoch": 0.13, "learning_rate": 1.999277501699927e-05, "loss": 0.0516, "step": 534 }, { "epoch": 0.13, "learning_rate": 1.999267837457839e-05, "loss": 0.1731, "step": 535 }, { "epoch": 0.13, "learning_rate": 1.9992581090329715e-05, "loss": 0.0918, "step": 536 }, { "epoch": 0.13, "learning_rate": 1.99924831642595e-05, "loss": 0.2368, "step": 537 }, { "epoch": 0.13, "learning_rate": 1.9992384596374036e-05, "loss": 0.0574, "step": 538 }, { "epoch": 0.13, "learning_rate": 1.999228538667965e-05, "loss": 0.0343, "step": 539 }, { "epoch": 0.13, "learning_rate": 1.9992185535182715e-05, "loss": 0.1046, "step": 540 }, { "epoch": 0.13, "learning_rate": 1.9992085041889645e-05, "loss": 0.0978, "step": 541 }, { "epoch": 0.13, "learning_rate": 1.999198390680689e-05, "loss": 0.0234, "step": 542 }, { "epoch": 0.13, "learning_rate": 1.9991882129940954e-05, "loss": 0.1665, "step": 543 }, { "epoch": 0.13, "learning_rate": 1.9991779711298377e-05, "loss": 0.1133, "step": 544 }, { "epoch": 0.13, "learning_rate": 1.9991676650885722e-05, "loss": 0.142, "step": 545 }, { "epoch": 0.13, "learning_rate": 1.9991572948709624e-05, "loss": 0.1173, "step": 546 }, { "epoch": 0.13, "learning_rate": 1.9991468604776733e-05, "loss": 0.1711, "step": 547 }, { "epoch": 0.13, "learning_rate": 1.9991363619093757e-05, "loss": 0.1086, "step": 548 }, { "epoch": 0.13, "learning_rate": 1.999125799166744e-05, "loss": 0.1235, "step": 549 }, { "epoch": 0.13, "learning_rate": 1.999115172250456e-05, "loss": 0.1551, "step": 550 }, { "epoch": 0.13, "learning_rate": 1.999104481161195e-05, "loss": 0.1569, "step": 551 }, { "epoch": 0.13, "learning_rate": 1.999093725899647e-05, "loss": 0.1107, "step": 552 }, { "epoch": 0.13, "learning_rate": 1.9990829064665034e-05, "loss": 0.1456, "step": 553 }, { "epoch": 0.13, "learning_rate": 1.9990720228624587e-05, "loss": 0.1257, "step": 554 }, { "epoch": 0.13, "learning_rate": 1.9990610750882123e-05, "loss": 0.1114, "step": 555 }, { "epoch": 0.13, "learning_rate": 1.9990500631444673e-05, "loss": 0.0945, "step": 556 }, { "epoch": 0.13, "learning_rate": 1.9990389870319307e-05, "loss": 0.1497, "step": 557 }, { "epoch": 0.13, "learning_rate": 1.999027846751314e-05, "loss": 0.0789, "step": 558 }, { "epoch": 0.13, "learning_rate": 1.999016642303333e-05, "loss": 0.1146, "step": 559 }, { "epoch": 0.13, "learning_rate": 1.9990053736887075e-05, "loss": 0.1092, "step": 560 }, { "epoch": 0.13, "learning_rate": 1.998994040908161e-05, "loss": 0.1161, "step": 561 }, { "epoch": 0.13, "learning_rate": 1.998982643962421e-05, "loss": 0.1392, "step": 562 }, { "epoch": 0.13, "learning_rate": 1.9989711828522197e-05, "loss": 0.0717, "step": 563 }, { "epoch": 0.13, "learning_rate": 1.998959657578294e-05, "loss": 0.0684, "step": 564 }, { "epoch": 0.13, "learning_rate": 1.9989480681413836e-05, "loss": 0.0988, "step": 565 }, { "epoch": 0.13, "learning_rate": 1.9989364145422327e-05, "loss": 0.1269, "step": 566 }, { "epoch": 0.13, "learning_rate": 1.99892469678159e-05, "loss": 0.1096, "step": 567 }, { "epoch": 0.13, "learning_rate": 1.9989129148602085e-05, "loss": 0.0611, "step": 568 }, { "epoch": 0.13, "learning_rate": 1.9989010687788444e-05, "loss": 0.0828, "step": 569 }, { "epoch": 0.13, "learning_rate": 1.998889158538259e-05, "loss": 0.1707, "step": 570 }, { "epoch": 0.13, "learning_rate": 1.9988771841392167e-05, "loss": 0.0618, "step": 571 }, { "epoch": 0.13, "learning_rate": 1.998865145582487e-05, "loss": 0.0905, "step": 572 }, { "epoch": 0.13, "learning_rate": 1.998853042868844e-05, "loss": 0.0554, "step": 573 }, { "epoch": 0.13, "learning_rate": 1.998840875999063e-05, "loss": 0.0822, "step": 574 }, { "epoch": 0.13, "learning_rate": 1.9988286449739273e-05, "loss": 0.0561, "step": 575 }, { "epoch": 0.14, "learning_rate": 1.9988163497942218e-05, "loss": 0.1362, "step": 576 }, { "epoch": 0.14, "learning_rate": 1.9988039904607358e-05, "loss": 0.1692, "step": 577 }, { "epoch": 0.14, "learning_rate": 1.9987915669742643e-05, "loss": 0.1107, "step": 578 }, { "epoch": 0.14, "learning_rate": 1.998779079335604e-05, "loss": 0.1737, "step": 579 }, { "epoch": 0.14, "learning_rate": 1.9987665275455578e-05, "loss": 0.0424, "step": 580 }, { "epoch": 0.14, "learning_rate": 1.9987539116049316e-05, "loss": 0.0568, "step": 581 }, { "epoch": 0.14, "learning_rate": 1.998741231514536e-05, "loss": 0.0239, "step": 582 }, { "epoch": 0.14, "learning_rate": 1.998728487275185e-05, "loss": 0.0657, "step": 583 }, { "epoch": 0.14, "learning_rate": 1.998715678887697e-05, "loss": 0.1243, "step": 584 }, { "epoch": 0.14, "learning_rate": 1.998702806352896e-05, "loss": 0.0989, "step": 585 }, { "epoch": 0.14, "learning_rate": 1.998689869671607e-05, "loss": 0.1237, "step": 586 }, { "epoch": 0.14, "learning_rate": 1.998676868844662e-05, "loss": 0.1728, "step": 587 }, { "epoch": 0.14, "learning_rate": 1.9986638038728955e-05, "loss": 0.1257, "step": 588 }, { "epoch": 0.14, "learning_rate": 1.9986506747571474e-05, "loss": 0.075, "step": 589 }, { "epoch": 0.14, "learning_rate": 1.9986374814982604e-05, "loss": 0.1066, "step": 590 }, { "epoch": 0.14, "learning_rate": 1.998624224097082e-05, "loss": 0.0785, "step": 591 }, { "epoch": 0.14, "learning_rate": 1.998610902554464e-05, "loss": 0.0847, "step": 592 }, { "epoch": 0.14, "learning_rate": 1.998597516871262e-05, "loss": 0.0389, "step": 593 }, { "epoch": 0.14, "learning_rate": 1.9985840670483346e-05, "loss": 0.0598, "step": 594 }, { "epoch": 0.14, "learning_rate": 1.9985705530865473e-05, "loss": 0.0952, "step": 595 }, { "epoch": 0.14, "learning_rate": 1.9985569749867676e-05, "loss": 0.048, "step": 596 }, { "epoch": 0.14, "learning_rate": 1.9985433327498672e-05, "loss": 0.0786, "step": 597 }, { "epoch": 0.14, "learning_rate": 1.9985296263767228e-05, "loss": 0.0512, "step": 598 }, { "epoch": 0.14, "learning_rate": 1.998515855868214e-05, "loss": 0.0755, "step": 599 }, { "epoch": 0.14, "learning_rate": 1.9985020212252264e-05, "loss": 0.0705, "step": 600 }, { "epoch": 0.14, "learning_rate": 1.998488122448648e-05, "loss": 0.0855, "step": 601 }, { "epoch": 0.14, "learning_rate": 1.9984741595393713e-05, "loss": 0.1437, "step": 602 }, { "epoch": 0.14, "learning_rate": 1.9984601324982935e-05, "loss": 0.2492, "step": 603 }, { "epoch": 0.14, "learning_rate": 1.9984460413263152e-05, "loss": 0.2175, "step": 604 }, { "epoch": 0.14, "learning_rate": 1.9984318860243422e-05, "loss": 0.136, "step": 605 }, { "epoch": 0.14, "learning_rate": 1.998417666593283e-05, "loss": 0.652, "step": 606 }, { "epoch": 0.14, "learning_rate": 1.998403383034051e-05, "loss": 0.1113, "step": 607 }, { "epoch": 0.14, "learning_rate": 1.9983890353475637e-05, "loss": 0.0664, "step": 608 }, { "epoch": 0.14, "learning_rate": 1.998374623534743e-05, "loss": 0.1534, "step": 609 }, { "epoch": 0.14, "learning_rate": 1.998360147596514e-05, "loss": 0.1177, "step": 610 }, { "epoch": 0.14, "learning_rate": 1.9983456075338067e-05, "loss": 0.0826, "step": 611 }, { "epoch": 0.14, "learning_rate": 1.998331003347555e-05, "loss": 0.1112, "step": 612 }, { "epoch": 0.14, "learning_rate": 1.9983163350386974e-05, "loss": 0.0795, "step": 613 }, { "epoch": 0.14, "learning_rate": 1.9983016026081752e-05, "loss": 0.1089, "step": 614 }, { "epoch": 0.14, "learning_rate": 1.9982868060569355e-05, "loss": 0.066, "step": 615 }, { "epoch": 0.14, "learning_rate": 1.9982719453859277e-05, "loss": 0.0869, "step": 616 }, { "epoch": 0.14, "learning_rate": 1.9982570205961074e-05, "loss": 0.0773, "step": 617 }, { "epoch": 0.15, "learning_rate": 1.9982420316884324e-05, "loss": 0.1264, "step": 618 }, { "epoch": 0.15, "learning_rate": 1.9982269786638656e-05, "loss": 0.122, "step": 619 }, { "epoch": 0.15, "learning_rate": 1.998211861523374e-05, "loss": 0.0762, "step": 620 }, { "epoch": 0.15, "learning_rate": 1.998196680267929e-05, "loss": 0.0655, "step": 621 }, { "epoch": 0.15, "learning_rate": 1.9981814348985046e-05, "loss": 1.2095, "step": 622 }, { "epoch": 0.15, "learning_rate": 1.998166125416081e-05, "loss": 0.1221, "step": 623 }, { "epoch": 0.15, "learning_rate": 1.998150751821641e-05, "loss": 0.1309, "step": 624 }, { "epoch": 0.15, "learning_rate": 1.9981353141161723e-05, "loss": 0.114, "step": 625 }, { "epoch": 0.15, "learning_rate": 1.9981198123006662e-05, "loss": 0.1356, "step": 626 }, { "epoch": 0.15, "learning_rate": 1.9981042463761186e-05, "loss": 0.0896, "step": 627 }, { "epoch": 0.15, "learning_rate": 1.9980886163435294e-05, "loss": 0.0912, "step": 628 }, { "epoch": 0.15, "learning_rate": 1.998072922203902e-05, "loss": 0.2219, "step": 629 }, { "epoch": 0.15, "learning_rate": 1.9980571639582453e-05, "loss": 0.1388, "step": 630 }, { "epoch": 0.15, "learning_rate": 1.9980413416075704e-05, "loss": 0.1026, "step": 631 }, { "epoch": 0.15, "learning_rate": 1.9980254551528942e-05, "loss": 0.2136, "step": 632 }, { "epoch": 0.15, "learning_rate": 1.9980095045952373e-05, "loss": 0.2008, "step": 633 }, { "epoch": 0.15, "learning_rate": 1.9979934899356237e-05, "loss": 0.1212, "step": 634 }, { "epoch": 0.15, "learning_rate": 1.9979774111750822e-05, "loss": 0.121, "step": 635 }, { "epoch": 0.15, "learning_rate": 1.9979612683146455e-05, "loss": 0.0862, "step": 636 }, { "epoch": 0.15, "learning_rate": 1.9979450613553503e-05, "loss": 0.166, "step": 637 }, { "epoch": 0.15, "learning_rate": 1.9979287902982382e-05, "loss": 0.092, "step": 638 }, { "epoch": 0.15, "learning_rate": 1.9979124551443533e-05, "loss": 0.1416, "step": 639 }, { "epoch": 0.15, "learning_rate": 1.9978960558947457e-05, "loss": 0.1481, "step": 640 }, { "epoch": 0.15, "learning_rate": 1.9978795925504684e-05, "loss": 0.0905, "step": 641 }, { "epoch": 0.15, "learning_rate": 1.9978630651125788e-05, "loss": 0.0866, "step": 642 }, { "epoch": 0.15, "learning_rate": 1.997846473582138e-05, "loss": 0.0957, "step": 643 }, { "epoch": 0.15, "learning_rate": 1.9978298179602128e-05, "loss": 0.0647, "step": 644 }, { "epoch": 0.15, "learning_rate": 1.997813098247872e-05, "loss": 0.0747, "step": 645 }, { "epoch": 0.15, "learning_rate": 1.99779631444619e-05, "loss": 0.1309, "step": 646 }, { "epoch": 0.15, "learning_rate": 1.9977794665562444e-05, "loss": 0.1041, "step": 647 }, { "epoch": 0.15, "learning_rate": 1.9977625545791173e-05, "loss": 0.0729, "step": 648 }, { "epoch": 0.15, "learning_rate": 1.997745578515896e-05, "loss": 0.0484, "step": 649 }, { "epoch": 0.15, "learning_rate": 1.9977285383676697e-05, "loss": 0.0427, "step": 650 }, { "epoch": 0.15, "learning_rate": 1.9977114341355332e-05, "loss": 0.1231, "step": 651 }, { "epoch": 0.15, "learning_rate": 1.997694265820585e-05, "loss": 0.1009, "step": 652 }, { "epoch": 0.15, "learning_rate": 1.9976770334239285e-05, "loss": 0.1276, "step": 653 }, { "epoch": 0.15, "learning_rate": 1.9976597369466698e-05, "loss": 0.0494, "step": 654 }, { "epoch": 0.15, "learning_rate": 1.99764237638992e-05, "loss": 0.0451, "step": 655 }, { "epoch": 0.15, "learning_rate": 1.9976249517547943e-05, "loss": 0.1671, "step": 656 }, { "epoch": 0.15, "learning_rate": 1.997607463042412e-05, "loss": 0.1822, "step": 657 }, { "epoch": 0.15, "learning_rate": 1.997589910253896e-05, "loss": 0.1046, "step": 658 }, { "epoch": 0.15, "learning_rate": 1.997572293390374e-05, "loss": 0.1085, "step": 659 }, { "epoch": 0.15, "learning_rate": 1.9975546124529776e-05, "loss": 0.156, "step": 660 }, { "epoch": 0.16, "learning_rate": 1.997536867442842e-05, "loss": 0.1323, "step": 661 }, { "epoch": 0.16, "learning_rate": 1.997519058361108e-05, "loss": 0.0956, "step": 662 }, { "epoch": 0.16, "learning_rate": 1.997501185208918e-05, "loss": 0.1394, "step": 663 }, { "epoch": 0.16, "learning_rate": 1.997483247987421e-05, "loss": 0.1292, "step": 664 }, { "epoch": 0.16, "learning_rate": 1.9974652466977686e-05, "loss": 0.0916, "step": 665 }, { "epoch": 0.16, "learning_rate": 1.9974471813411174e-05, "loss": 0.0479, "step": 666 }, { "epoch": 0.16, "learning_rate": 1.997429051918628e-05, "loss": 0.149, "step": 667 }, { "epoch": 0.16, "learning_rate": 1.997410858431464e-05, "loss": 0.0751, "step": 668 }, { "epoch": 0.16, "learning_rate": 1.9973926008807946e-05, "loss": 0.0696, "step": 669 }, { "epoch": 0.16, "learning_rate": 1.997374279267792e-05, "loss": 0.1897, "step": 670 }, { "epoch": 0.16, "learning_rate": 1.9973558935936336e-05, "loss": 0.1429, "step": 671 }, { "epoch": 0.16, "learning_rate": 1.9973374438594998e-05, "loss": 0.1694, "step": 672 }, { "epoch": 0.16, "learning_rate": 1.997318930066576e-05, "loss": 0.0476, "step": 673 }, { "epoch": 0.16, "learning_rate": 1.997300352216051e-05, "loss": 0.0845, "step": 674 }, { "epoch": 0.16, "learning_rate": 1.9972817103091183e-05, "loss": 0.0971, "step": 675 }, { "epoch": 0.16, "learning_rate": 1.997263004346975e-05, "loss": 0.0817, "step": 676 }, { "epoch": 0.16, "learning_rate": 1.9972442343308227e-05, "loss": 0.0728, "step": 677 }, { "epoch": 0.16, "learning_rate": 1.9972254002618673e-05, "loss": 0.0752, "step": 678 }, { "epoch": 0.16, "learning_rate": 1.997206502141318e-05, "loss": 0.139, "step": 679 }, { "epoch": 0.16, "learning_rate": 1.9971875399703893e-05, "loss": 0.0744, "step": 680 }, { "epoch": 0.16, "learning_rate": 1.9971685137502985e-05, "loss": 0.124, "step": 681 }, { "epoch": 0.16, "learning_rate": 1.9971494234822673e-05, "loss": 0.2626, "step": 682 }, { "epoch": 0.16, "learning_rate": 1.997130269167523e-05, "loss": 0.3538, "step": 683 }, { "epoch": 0.16, "learning_rate": 1.9971110508072954e-05, "loss": 0.149, "step": 684 }, { "epoch": 0.16, "learning_rate": 1.9970917684028184e-05, "loss": 0.0877, "step": 685 }, { "epoch": 0.16, "learning_rate": 1.997072421955331e-05, "loss": 0.083, "step": 686 }, { "epoch": 0.16, "learning_rate": 1.997053011466076e-05, "loss": 0.1042, "step": 687 }, { "epoch": 0.16, "learning_rate": 1.9970335369362998e-05, "loss": 0.0379, "step": 688 }, { "epoch": 0.16, "learning_rate": 1.997013998367253e-05, "loss": 0.0471, "step": 689 }, { "epoch": 0.16, "learning_rate": 1.996994395760191e-05, "loss": 0.1063, "step": 690 }, { "epoch": 0.16, "learning_rate": 1.9969747291163725e-05, "loss": 0.1227, "step": 691 }, { "epoch": 0.16, "learning_rate": 1.996954998437061e-05, "loss": 0.0853, "step": 692 }, { "epoch": 0.16, "learning_rate": 1.9969352037235237e-05, "loss": 0.0854, "step": 693 }, { "epoch": 0.16, "learning_rate": 1.9969153449770324e-05, "loss": 0.0404, "step": 694 }, { "epoch": 0.16, "learning_rate": 1.996895422198862e-05, "loss": 0.0618, "step": 695 }, { "epoch": 0.16, "learning_rate": 1.996875435390292e-05, "loss": 0.0653, "step": 696 }, { "epoch": 0.16, "learning_rate": 1.996855384552607e-05, "loss": 0.039, "step": 697 }, { "epoch": 0.16, "learning_rate": 1.9968352696870945e-05, "loss": 0.0749, "step": 698 }, { "epoch": 0.16, "learning_rate": 1.9968150907950462e-05, "loss": 0.1514, "step": 699 }, { "epoch": 0.16, "learning_rate": 1.9967948478777578e-05, "loss": 0.0826, "step": 700 }, { "epoch": 0.16, "learning_rate": 1.9967745409365306e-05, "loss": 0.0962, "step": 701 }, { "epoch": 0.16, "learning_rate": 1.996754169972668e-05, "loss": 0.1143, "step": 702 }, { "epoch": 0.16, "learning_rate": 1.9967337349874795e-05, "loss": 0.0469, "step": 703 }, { "epoch": 0.17, "learning_rate": 1.9967132359822762e-05, "loss": 0.0855, "step": 704 }, { "epoch": 0.17, "learning_rate": 1.9966926729583757e-05, "loss": 0.097, "step": 705 }, { "epoch": 0.17, "learning_rate": 1.996672045917099e-05, "loss": 0.0293, "step": 706 }, { "epoch": 0.17, "learning_rate": 1.99665135485977e-05, "loss": 0.0706, "step": 707 }, { "epoch": 0.17, "learning_rate": 1.996630599787718e-05, "loss": 0.0825, "step": 708 }, { "epoch": 0.17, "learning_rate": 1.9966097807022764e-05, "loss": 0.062, "step": 709 }, { "epoch": 0.17, "learning_rate": 1.9965888976047824e-05, "loss": 0.0777, "step": 710 }, { "epoch": 0.17, "learning_rate": 1.996567950496577e-05, "loss": 0.1704, "step": 711 }, { "epoch": 0.17, "learning_rate": 1.9965469393790058e-05, "loss": 0.0614, "step": 712 }, { "epoch": 0.17, "learning_rate": 1.9965258642534188e-05, "loss": 0.092, "step": 713 }, { "epoch": 0.17, "learning_rate": 1.9965047251211688e-05, "loss": 0.1802, "step": 714 }, { "epoch": 0.17, "learning_rate": 1.996483521983614e-05, "loss": 0.1172, "step": 715 }, { "epoch": 0.17, "learning_rate": 1.996462254842116e-05, "loss": 0.1462, "step": 716 }, { "epoch": 0.17, "learning_rate": 1.9964409236980414e-05, "loss": 0.1099, "step": 717 }, { "epoch": 0.17, "learning_rate": 1.9964195285527604e-05, "loss": 0.078, "step": 718 }, { "epoch": 0.17, "learning_rate": 1.9963980694076462e-05, "loss": 0.0436, "step": 719 }, { "epoch": 0.17, "learning_rate": 1.9963765462640774e-05, "loss": 0.0839, "step": 720 }, { "epoch": 0.17, "learning_rate": 1.996354959123437e-05, "loss": 0.0598, "step": 721 }, { "epoch": 0.17, "learning_rate": 1.996333307987111e-05, "loss": 0.1645, "step": 722 }, { "epoch": 0.17, "learning_rate": 1.9963115928564906e-05, "loss": 0.1288, "step": 723 }, { "epoch": 0.17, "learning_rate": 1.9962898137329702e-05, "loss": 0.0752, "step": 724 }, { "epoch": 0.17, "learning_rate": 1.9962679706179483e-05, "loss": 0.0383, "step": 725 }, { "epoch": 0.17, "learning_rate": 1.9962460635128283e-05, "loss": 0.083, "step": 726 }, { "epoch": 0.17, "learning_rate": 1.9962240924190177e-05, "loss": 0.0517, "step": 727 }, { "epoch": 0.17, "learning_rate": 1.996202057337927e-05, "loss": 0.1599, "step": 728 }, { "epoch": 0.17, "learning_rate": 1.996179958270972e-05, "loss": 0.0694, "step": 729 }, { "epoch": 0.17, "learning_rate": 1.9961577952195716e-05, "loss": 0.1277, "step": 730 }, { "epoch": 0.17, "learning_rate": 1.9961355681851496e-05, "loss": 0.278, "step": 731 }, { "epoch": 0.17, "learning_rate": 1.9961132771691334e-05, "loss": 0.1196, "step": 732 }, { "epoch": 0.17, "learning_rate": 1.9960909221729555e-05, "loss": 0.1914, "step": 733 }, { "epoch": 0.17, "learning_rate": 1.996068503198051e-05, "loss": 0.0937, "step": 734 }, { "epoch": 0.17, "learning_rate": 1.99604602024586e-05, "loss": 0.1324, "step": 735 }, { "epoch": 0.17, "learning_rate": 1.996023473317827e-05, "loss": 0.0753, "step": 736 }, { "epoch": 0.17, "learning_rate": 1.9960008624154e-05, "loss": 0.0988, "step": 737 }, { "epoch": 0.17, "learning_rate": 1.9959781875400307e-05, "loss": 0.043, "step": 738 }, { "epoch": 0.17, "learning_rate": 1.9959554486931766e-05, "loss": 0.1069, "step": 739 }, { "epoch": 0.17, "learning_rate": 1.995932645876297e-05, "loss": 0.0542, "step": 740 }, { "epoch": 0.17, "learning_rate": 1.9959097790908573e-05, "loss": 0.0505, "step": 741 }, { "epoch": 0.17, "learning_rate": 1.995886848338326e-05, "loss": 0.0633, "step": 742 }, { "epoch": 0.17, "learning_rate": 1.995863853620176e-05, "loss": 0.0957, "step": 743 }, { "epoch": 0.17, "learning_rate": 1.9958407949378845e-05, "loss": 0.1676, "step": 744 }, { "epoch": 0.17, "learning_rate": 1.9958176722929318e-05, "loss": 0.0662, "step": 745 }, { "epoch": 0.18, "learning_rate": 1.9957944856868038e-05, "loss": 0.0619, "step": 746 }, { "epoch": 0.18, "learning_rate": 1.9957712351209896e-05, "loss": 0.1227, "step": 747 }, { "epoch": 0.18, "learning_rate": 1.9957479205969826e-05, "loss": 0.1283, "step": 748 }, { "epoch": 0.18, "learning_rate": 1.99572454211628e-05, "loss": 0.0564, "step": 749 }, { "epoch": 0.18, "learning_rate": 1.9957010996803835e-05, "loss": 0.0988, "step": 750 }, { "epoch": 0.18, "learning_rate": 1.9956775932907987e-05, "loss": 0.118, "step": 751 }, { "epoch": 0.18, "learning_rate": 1.995654022949036e-05, "loss": 0.0504, "step": 752 }, { "epoch": 0.18, "learning_rate": 1.9956303886566088e-05, "loss": 0.1262, "step": 753 }, { "epoch": 0.18, "learning_rate": 1.995606690415035e-05, "loss": 0.1252, "step": 754 }, { "epoch": 0.18, "learning_rate": 1.9955829282258374e-05, "loss": 0.0982, "step": 755 }, { "epoch": 0.18, "learning_rate": 1.995559102090541e-05, "loss": 0.0905, "step": 756 }, { "epoch": 0.18, "learning_rate": 1.995535212010678e-05, "loss": 0.0511, "step": 757 }, { "epoch": 0.18, "learning_rate": 1.995511257987781e-05, "loss": 0.0716, "step": 758 }, { "epoch": 0.18, "learning_rate": 1.9954872400233897e-05, "loss": 0.1189, "step": 759 }, { "epoch": 0.18, "learning_rate": 1.9954631581190466e-05, "loss": 0.1122, "step": 760 }, { "epoch": 0.18, "learning_rate": 1.995439012276298e-05, "loss": 0.0349, "step": 761 }, { "epoch": 0.18, "learning_rate": 1.9954148024966954e-05, "loss": 0.1044, "step": 762 }, { "epoch": 0.18, "learning_rate": 1.9953905287817932e-05, "loss": 0.0733, "step": 763 }, { "epoch": 0.18, "learning_rate": 1.995366191133151e-05, "loss": 0.0758, "step": 764 }, { "epoch": 0.18, "learning_rate": 1.9953417895523314e-05, "loss": 0.058, "step": 765 }, { "epoch": 0.18, "learning_rate": 1.9953173240409028e-05, "loss": 0.1297, "step": 766 }, { "epoch": 0.18, "learning_rate": 1.9952927946004353e-05, "loss": 0.1844, "step": 767 }, { "epoch": 0.18, "learning_rate": 1.995268201232505e-05, "loss": 0.0655, "step": 768 }, { "epoch": 0.18, "learning_rate": 1.995243543938692e-05, "loss": 0.1063, "step": 769 }, { "epoch": 0.18, "learning_rate": 1.9952188227205796e-05, "loss": 0.1078, "step": 770 }, { "epoch": 0.18, "learning_rate": 1.9951940375797555e-05, "loss": 0.0926, "step": 771 }, { "epoch": 0.18, "learning_rate": 1.9951691885178113e-05, "loss": 0.0816, "step": 772 }, { "epoch": 0.18, "learning_rate": 1.9951442755363445e-05, "loss": 0.0729, "step": 773 }, { "epoch": 0.18, "learning_rate": 1.9951192986369533e-05, "loss": 0.0479, "step": 774 }, { "epoch": 0.18, "learning_rate": 1.9950942578212436e-05, "loss": 0.0722, "step": 775 }, { "epoch": 0.18, "learning_rate": 1.9950691530908228e-05, "loss": 0.0483, "step": 776 }, { "epoch": 0.18, "learning_rate": 1.995043984447304e-05, "loss": 0.1047, "step": 777 }, { "epoch": 0.18, "learning_rate": 1.995018751892303e-05, "loss": 0.0812, "step": 778 }, { "epoch": 0.18, "learning_rate": 1.9949934554274413e-05, "loss": 0.0518, "step": 779 }, { "epoch": 0.18, "learning_rate": 1.9949680950543435e-05, "loss": 0.089, "step": 780 }, { "epoch": 0.18, "learning_rate": 1.994942670774638e-05, "loss": 0.083, "step": 781 }, { "epoch": 0.18, "learning_rate": 1.994917182589958e-05, "loss": 0.2014, "step": 782 }, { "epoch": 0.18, "learning_rate": 1.994891630501941e-05, "loss": 0.0701, "step": 783 }, { "epoch": 0.18, "learning_rate": 1.9948660145122275e-05, "loss": 0.1107, "step": 784 }, { "epoch": 0.18, "learning_rate": 1.9948403346224638e-05, "loss": 0.0488, "step": 785 }, { "epoch": 0.18, "learning_rate": 1.9948145908342984e-05, "loss": 0.148, "step": 786 }, { "epoch": 0.18, "learning_rate": 1.9947887831493854e-05, "loss": 0.0685, "step": 787 }, { "epoch": 0.18, "learning_rate": 1.994762911569382e-05, "loss": 0.0544, "step": 788 }, { "epoch": 0.19, "learning_rate": 1.9947369760959502e-05, "loss": 0.1769, "step": 789 }, { "epoch": 0.19, "learning_rate": 1.9947109767307555e-05, "loss": 0.0752, "step": 790 }, { "epoch": 0.19, "learning_rate": 1.9946849134754686e-05, "loss": 0.0872, "step": 791 }, { "epoch": 0.19, "learning_rate": 1.9946587863317624e-05, "loss": 0.0744, "step": 792 }, { "epoch": 0.19, "learning_rate": 1.994632595301316e-05, "loss": 0.0663, "step": 793 }, { "epoch": 0.19, "learning_rate": 1.9946063403858114e-05, "loss": 0.1519, "step": 794 }, { "epoch": 0.19, "learning_rate": 1.9945800215869346e-05, "loss": 0.0626, "step": 795 }, { "epoch": 0.19, "learning_rate": 1.9945536389063767e-05, "loss": 0.1107, "step": 796 }, { "epoch": 0.19, "learning_rate": 1.9945271923458315e-05, "loss": 0.0579, "step": 797 }, { "epoch": 0.19, "learning_rate": 1.994500681906998e-05, "loss": 0.1402, "step": 798 }, { "epoch": 0.19, "learning_rate": 1.9944741075915793e-05, "loss": 0.1044, "step": 799 }, { "epoch": 0.19, "learning_rate": 1.9944474694012817e-05, "loss": 0.1127, "step": 800 }, { "epoch": 0.19, "learning_rate": 1.9944207673378163e-05, "loss": 0.1117, "step": 801 }, { "epoch": 0.19, "learning_rate": 1.9943940014028987e-05, "loss": 0.0461, "step": 802 }, { "epoch": 0.19, "learning_rate": 1.9943671715982474e-05, "loss": 0.0404, "step": 803 }, { "epoch": 0.19, "learning_rate": 1.994340277925586e-05, "loss": 0.0831, "step": 804 }, { "epoch": 0.19, "learning_rate": 1.9943133203866416e-05, "loss": 0.0762, "step": 805 }, { "epoch": 0.19, "learning_rate": 1.9942862989831458e-05, "loss": 0.2165, "step": 806 }, { "epoch": 0.19, "learning_rate": 1.9942592137168345e-05, "loss": 0.1435, "step": 807 }, { "epoch": 0.19, "learning_rate": 1.994232064589447e-05, "loss": 0.0409, "step": 808 }, { "epoch": 0.19, "learning_rate": 1.9942048516027275e-05, "loss": 0.0329, "step": 809 }, { "epoch": 0.19, "learning_rate": 1.9941775747584233e-05, "loss": 0.1527, "step": 810 }, { "epoch": 0.19, "learning_rate": 1.994150234058287e-05, "loss": 0.0293, "step": 811 }, { "epoch": 0.19, "learning_rate": 1.994122829504074e-05, "loss": 0.0583, "step": 812 }, { "epoch": 0.19, "learning_rate": 1.994095361097545e-05, "loss": 0.0509, "step": 813 }, { "epoch": 0.19, "learning_rate": 1.9940678288404642e-05, "loss": 0.0466, "step": 814 }, { "epoch": 0.19, "learning_rate": 1.9940402327345998e-05, "loss": 0.0653, "step": 815 }, { "epoch": 0.19, "learning_rate": 1.9940125727817245e-05, "loss": 0.0384, "step": 816 }, { "epoch": 0.19, "learning_rate": 1.9939848489836148e-05, "loss": 0.0962, "step": 817 }, { "epoch": 0.19, "learning_rate": 1.9939570613420514e-05, "loss": 0.0275, "step": 818 }, { "epoch": 0.19, "learning_rate": 1.9939292098588193e-05, "loss": 0.066, "step": 819 }, { "epoch": 0.19, "learning_rate": 1.9939012945357073e-05, "loss": 0.1225, "step": 820 }, { "epoch": 0.19, "learning_rate": 1.9938733153745082e-05, "loss": 0.0731, "step": 821 }, { "epoch": 0.19, "learning_rate": 1.9938452723770193e-05, "loss": 0.0545, "step": 822 }, { "epoch": 0.19, "learning_rate": 1.9938171655450414e-05, "loss": 0.0562, "step": 823 }, { "epoch": 0.19, "learning_rate": 1.9937889948803804e-05, "loss": 0.0945, "step": 824 }, { "epoch": 0.19, "learning_rate": 1.9937607603848453e-05, "loss": 0.1192, "step": 825 }, { "epoch": 0.19, "learning_rate": 1.9937324620602496e-05, "loss": 0.1501, "step": 826 }, { "epoch": 0.19, "learning_rate": 1.993704099908411e-05, "loss": 0.1506, "step": 827 }, { "epoch": 0.19, "learning_rate": 1.9936756739311513e-05, "loss": 0.1699, "step": 828 }, { "epoch": 0.19, "learning_rate": 1.993647184130296e-05, "loss": 0.1531, "step": 829 }, { "epoch": 0.19, "learning_rate": 1.9936186305076753e-05, "loss": 0.1802, "step": 830 }, { "epoch": 0.2, "learning_rate": 1.993590013065123e-05, "loss": 0.1187, "step": 831 }, { "epoch": 0.2, "learning_rate": 1.9935613318044776e-05, "loss": 0.1247, "step": 832 }, { "epoch": 0.2, "learning_rate": 1.9935325867275805e-05, "loss": 0.0985, "step": 833 }, { "epoch": 0.2, "learning_rate": 1.9935037778362786e-05, "loss": 0.1005, "step": 834 }, { "epoch": 0.2, "learning_rate": 1.9934749051324225e-05, "loss": 0.0885, "step": 835 }, { "epoch": 0.2, "learning_rate": 1.9934459686178658e-05, "loss": 0.0868, "step": 836 }, { "epoch": 0.2, "learning_rate": 1.9934169682944676e-05, "loss": 0.186, "step": 837 }, { "epoch": 0.2, "learning_rate": 1.993387904164091e-05, "loss": 0.1544, "step": 838 }, { "epoch": 0.2, "learning_rate": 1.9933587762286023e-05, "loss": 0.0802, "step": 839 }, { "epoch": 0.2, "learning_rate": 1.9933295844898723e-05, "loss": 0.0607, "step": 840 }, { "epoch": 0.2, "learning_rate": 1.993300328949776e-05, "loss": 0.0576, "step": 841 }, { "epoch": 0.2, "learning_rate": 1.993271009610193e-05, "loss": 0.1888, "step": 842 }, { "epoch": 0.2, "learning_rate": 1.9932416264730057e-05, "loss": 0.0675, "step": 843 }, { "epoch": 0.2, "learning_rate": 1.993212179540102e-05, "loss": 0.0853, "step": 844 }, { "epoch": 0.2, "learning_rate": 1.993182668813373e-05, "loss": 0.0583, "step": 845 }, { "epoch": 0.2, "learning_rate": 1.993153094294714e-05, "loss": 0.0757, "step": 846 }, { "epoch": 0.2, "learning_rate": 1.993123455986025e-05, "loss": 0.1159, "step": 847 }, { "epoch": 0.2, "learning_rate": 1.9930937538892096e-05, "loss": 0.0613, "step": 848 }, { "epoch": 0.2, "learning_rate": 1.9930639880061752e-05, "loss": 0.033, "step": 849 }, { "epoch": 0.2, "learning_rate": 1.9930341583388335e-05, "loss": 0.0965, "step": 850 }, { "epoch": 0.2, "learning_rate": 1.993004264889101e-05, "loss": 0.0422, "step": 851 }, { "epoch": 0.2, "learning_rate": 1.9929743076588978e-05, "loss": 0.1094, "step": 852 }, { "epoch": 0.2, "learning_rate": 1.992944286650148e-05, "loss": 0.0303, "step": 853 }, { "epoch": 0.2, "learning_rate": 1.992914201864779e-05, "loss": 0.0411, "step": 854 }, { "epoch": 0.2, "learning_rate": 1.992884053304724e-05, "loss": 0.0766, "step": 855 }, { "epoch": 0.2, "learning_rate": 1.9928538409719196e-05, "loss": 0.1046, "step": 856 }, { "epoch": 0.2, "learning_rate": 1.9928235648683055e-05, "loss": 0.2786, "step": 857 }, { "epoch": 0.2, "learning_rate": 1.9927932249958268e-05, "loss": 0.4974, "step": 858 }, { "epoch": 0.2, "learning_rate": 1.9927628213564327e-05, "loss": 0.419, "step": 859 }, { "epoch": 0.2, "learning_rate": 1.992732353952075e-05, "loss": 0.2349, "step": 860 }, { "epoch": 0.2, "learning_rate": 1.9927018227847116e-05, "loss": 0.2913, "step": 861 }, { "epoch": 0.2, "learning_rate": 1.9926712278563024e-05, "loss": 0.2668, "step": 862 }, { "epoch": 0.2, "learning_rate": 1.9926405691688138e-05, "loss": 0.3072, "step": 863 }, { "epoch": 0.2, "learning_rate": 1.9926098467242137e-05, "loss": 0.2096, "step": 864 }, { "epoch": 0.2, "learning_rate": 1.9925790605244764e-05, "loss": 0.2353, "step": 865 }, { "epoch": 0.2, "learning_rate": 1.9925482105715792e-05, "loss": 0.1542, "step": 866 }, { "epoch": 0.2, "learning_rate": 1.992517296867503e-05, "loss": 0.1608, "step": 867 }, { "epoch": 0.2, "learning_rate": 1.992486319414234e-05, "loss": 0.1272, "step": 868 }, { "epoch": 0.2, "learning_rate": 1.9924552782137614e-05, "loss": 0.0711, "step": 869 }, { "epoch": 0.2, "learning_rate": 1.9924241732680793e-05, "loss": 0.0843, "step": 870 }, { "epoch": 0.2, "learning_rate": 1.992393004579185e-05, "loss": 0.0596, "step": 871 }, { "epoch": 0.2, "learning_rate": 1.9923617721490813e-05, "loss": 0.1161, "step": 872 }, { "epoch": 0.2, "learning_rate": 1.992330475979774e-05, "loss": 0.1338, "step": 873 }, { "epoch": 0.21, "learning_rate": 1.9922991160732726e-05, "loss": 0.0911, "step": 874 }, { "epoch": 0.21, "learning_rate": 1.992267692431592e-05, "loss": 0.1056, "step": 875 }, { "epoch": 0.21, "learning_rate": 1.9922362050567505e-05, "loss": 0.0225, "step": 876 }, { "epoch": 0.21, "learning_rate": 1.99220465395077e-05, "loss": 0.1048, "step": 877 }, { "epoch": 0.21, "learning_rate": 1.992173039115678e-05, "loss": 0.0373, "step": 878 }, { "epoch": 0.21, "learning_rate": 1.9921413605535042e-05, "loss": 0.098, "step": 879 }, { "epoch": 0.21, "learning_rate": 1.9921096182662835e-05, "loss": 0.099, "step": 880 }, { "epoch": 0.21, "learning_rate": 1.9920778122560552e-05, "loss": 0.0862, "step": 881 }, { "epoch": 0.21, "learning_rate": 1.9920459425248617e-05, "loss": 0.0515, "step": 882 }, { "epoch": 0.21, "learning_rate": 1.9920140090747497e-05, "loss": 0.1163, "step": 883 }, { "epoch": 0.21, "learning_rate": 1.991982011907771e-05, "loss": 0.0616, "step": 884 }, { "epoch": 0.21, "learning_rate": 1.9919499510259805e-05, "loss": 0.0588, "step": 885 }, { "epoch": 0.21, "learning_rate": 1.9919178264314375e-05, "loss": 0.5321, "step": 886 }, { "epoch": 0.21, "learning_rate": 1.9918856381262052e-05, "loss": 0.2754, "step": 887 }, { "epoch": 0.21, "learning_rate": 1.991853386112351e-05, "loss": 0.536, "step": 888 }, { "epoch": 0.21, "learning_rate": 1.9918210703919466e-05, "loss": 0.0512, "step": 889 }, { "epoch": 0.21, "learning_rate": 1.9917886909670677e-05, "loss": 0.1053, "step": 890 }, { "epoch": 0.21, "learning_rate": 1.9917562478397937e-05, "loss": 0.0972, "step": 891 }, { "epoch": 0.21, "learning_rate": 1.991723741012209e-05, "loss": 0.0748, "step": 892 }, { "epoch": 0.21, "learning_rate": 1.991691170486401e-05, "loss": 0.1106, "step": 893 }, { "epoch": 0.21, "learning_rate": 1.991658536264462e-05, "loss": 0.0869, "step": 894 }, { "epoch": 0.21, "learning_rate": 1.9916258383484876e-05, "loss": 0.0422, "step": 895 }, { "epoch": 0.21, "learning_rate": 1.9915930767405788e-05, "loss": 0.1043, "step": 896 }, { "epoch": 0.21, "learning_rate": 1.991560251442839e-05, "loss": 0.0648, "step": 897 }, { "epoch": 0.21, "learning_rate": 1.9915273624573772e-05, "loss": 0.1086, "step": 898 }, { "epoch": 0.21, "learning_rate": 1.9914944097863054e-05, "loss": 0.1069, "step": 899 }, { "epoch": 0.21, "learning_rate": 1.9914613934317405e-05, "loss": 0.1742, "step": 900 }, { "epoch": 0.21, "learning_rate": 1.9914283133958027e-05, "loss": 0.1333, "step": 901 }, { "epoch": 0.21, "learning_rate": 1.9913951696806174e-05, "loss": 0.1846, "step": 902 }, { "epoch": 0.21, "learning_rate": 1.991361962288313e-05, "loss": 0.1893, "step": 903 }, { "epoch": 0.21, "learning_rate": 1.991328691221022e-05, "loss": 0.085, "step": 904 }, { "epoch": 0.21, "learning_rate": 1.9912953564808822e-05, "loss": 0.047, "step": 905 }, { "epoch": 0.21, "learning_rate": 1.9912619580700342e-05, "loss": 0.0697, "step": 906 }, { "epoch": 0.21, "learning_rate": 1.9912284959906236e-05, "loss": 0.2403, "step": 907 }, { "epoch": 0.21, "learning_rate": 1.991194970244799e-05, "loss": 0.1004, "step": 908 }, { "epoch": 0.21, "learning_rate": 1.991161380834714e-05, "loss": 0.2634, "step": 909 }, { "epoch": 0.21, "learning_rate": 1.9911277277625264e-05, "loss": 0.1849, "step": 910 }, { "epoch": 0.21, "learning_rate": 1.9910940110303973e-05, "loss": 0.1134, "step": 911 }, { "epoch": 0.21, "learning_rate": 1.9910602306404927e-05, "loss": 0.1443, "step": 912 }, { "epoch": 0.21, "learning_rate": 1.9910263865949816e-05, "loss": 0.0563, "step": 913 }, { "epoch": 0.21, "learning_rate": 1.9909924788960388e-05, "loss": 0.0859, "step": 914 }, { "epoch": 0.21, "learning_rate": 1.9909585075458417e-05, "loss": 0.0975, "step": 915 }, { "epoch": 0.21, "learning_rate": 1.990924472546572e-05, "loss": 0.1497, "step": 916 }, { "epoch": 0.22, "learning_rate": 1.990890373900416e-05, "loss": 0.1328, "step": 917 }, { "epoch": 0.22, "learning_rate": 1.9908562116095637e-05, "loss": 0.1752, "step": 918 }, { "epoch": 0.22, "learning_rate": 1.99082198567621e-05, "loss": 0.0648, "step": 919 }, { "epoch": 0.22, "learning_rate": 1.9907876961025524e-05, "loss": 0.1167, "step": 920 }, { "epoch": 0.22, "learning_rate": 1.9907533428907936e-05, "loss": 0.0296, "step": 921 }, { "epoch": 0.22, "learning_rate": 1.9907189260431404e-05, "loss": 0.0632, "step": 922 }, { "epoch": 0.22, "learning_rate": 1.9906844455618027e-05, "loss": 0.049, "step": 923 }, { "epoch": 0.22, "learning_rate": 1.990649901448996e-05, "loss": 0.0224, "step": 924 }, { "epoch": 0.22, "learning_rate": 1.9906152937069386e-05, "loss": 0.0923, "step": 925 }, { "epoch": 0.22, "learning_rate": 1.990580622337853e-05, "loss": 0.0734, "step": 926 }, { "epoch": 0.22, "learning_rate": 1.990545887343967e-05, "loss": 0.0943, "step": 927 }, { "epoch": 0.22, "learning_rate": 1.990511088727511e-05, "loss": 0.1433, "step": 928 }, { "epoch": 0.22, "learning_rate": 1.9904762264907202e-05, "loss": 0.0466, "step": 929 }, { "epoch": 0.22, "learning_rate": 1.990441300635834e-05, "loss": 0.1079, "step": 930 }, { "epoch": 0.22, "learning_rate": 1.9904063111650954e-05, "loss": 0.1136, "step": 931 }, { "epoch": 0.22, "learning_rate": 1.990371258080752e-05, "loss": 0.0803, "step": 932 }, { "epoch": 0.22, "learning_rate": 1.9903361413850554e-05, "loss": 0.0761, "step": 933 }, { "epoch": 0.22, "learning_rate": 1.9903009610802604e-05, "loss": 0.094, "step": 934 }, { "epoch": 0.22, "learning_rate": 1.9902657171686278e-05, "loss": 0.1331, "step": 935 }, { "epoch": 0.22, "learning_rate": 1.99023040965242e-05, "loss": 0.0336, "step": 936 }, { "epoch": 0.22, "learning_rate": 1.9901950385339056e-05, "loss": 0.0831, "step": 937 }, { "epoch": 0.22, "learning_rate": 1.9901596038153564e-05, "loss": 0.0609, "step": 938 }, { "epoch": 0.22, "learning_rate": 1.9901241054990484e-05, "loss": 0.0515, "step": 939 }, { "epoch": 0.22, "learning_rate": 1.9900885435872614e-05, "loss": 0.0591, "step": 940 }, { "epoch": 0.22, "learning_rate": 1.9900529180822797e-05, "loss": 0.0803, "step": 941 }, { "epoch": 0.22, "learning_rate": 1.9900172289863914e-05, "loss": 0.0262, "step": 942 }, { "epoch": 0.22, "learning_rate": 1.989981476301889e-05, "loss": 0.0229, "step": 943 }, { "epoch": 0.22, "learning_rate": 1.9899456600310686e-05, "loss": 0.0502, "step": 944 }, { "epoch": 0.22, "learning_rate": 1.989909780176231e-05, "loss": 0.0594, "step": 945 }, { "epoch": 0.22, "learning_rate": 1.9898738367396808e-05, "loss": 0.0394, "step": 946 }, { "epoch": 0.22, "learning_rate": 1.9898378297237264e-05, "loss": 0.0911, "step": 947 }, { "epoch": 0.22, "learning_rate": 1.98980175913068e-05, "loss": 0.0249, "step": 948 }, { "epoch": 0.22, "learning_rate": 1.9897656249628596e-05, "loss": 0.1427, "step": 949 }, { "epoch": 0.22, "learning_rate": 1.9897294272225852e-05, "loss": 0.1719, "step": 950 }, { "epoch": 0.22, "learning_rate": 1.9896931659121823e-05, "loss": 0.2845, "step": 951 }, { "epoch": 0.22, "learning_rate": 1.9896568410339793e-05, "loss": 0.2415, "step": 952 }, { "epoch": 0.22, "learning_rate": 1.98962045259031e-05, "loss": 0.1373, "step": 953 }, { "epoch": 0.22, "learning_rate": 1.9895840005835115e-05, "loss": 0.0638, "step": 954 }, { "epoch": 0.22, "learning_rate": 1.9895474850159246e-05, "loss": 0.1436, "step": 955 }, { "epoch": 0.22, "learning_rate": 1.989510905889895e-05, "loss": 0.1311, "step": 956 }, { "epoch": 0.22, "learning_rate": 1.9894742632077726e-05, "loss": 0.1114, "step": 957 }, { "epoch": 0.22, "learning_rate": 1.9894375569719102e-05, "loss": 0.151, "step": 958 }, { "epoch": 0.23, "learning_rate": 1.989400787184666e-05, "loss": 0.0627, "step": 959 }, { "epoch": 0.23, "learning_rate": 1.9893639538484017e-05, "loss": 0.0535, "step": 960 }, { "epoch": 0.23, "learning_rate": 1.9893270569654827e-05, "loss": 0.0922, "step": 961 }, { "epoch": 0.23, "learning_rate": 1.9892900965382793e-05, "loss": 0.0534, "step": 962 }, { "epoch": 0.23, "learning_rate": 1.989253072569165e-05, "loss": 0.1053, "step": 963 }, { "epoch": 0.23, "learning_rate": 1.989215985060518e-05, "loss": 0.0636, "step": 964 }, { "epoch": 0.23, "learning_rate": 1.989178834014721e-05, "loss": 0.0827, "step": 965 }, { "epoch": 0.23, "learning_rate": 1.9891416194341594e-05, "loss": 0.0707, "step": 966 }, { "epoch": 0.23, "learning_rate": 1.989104341321224e-05, "loss": 0.0976, "step": 967 }, { "epoch": 0.23, "learning_rate": 1.989066999678309e-05, "loss": 0.0422, "step": 968 }, { "epoch": 0.23, "learning_rate": 1.9890295945078127e-05, "loss": 0.0939, "step": 969 }, { "epoch": 0.23, "learning_rate": 1.9889921258121376e-05, "loss": 0.0715, "step": 970 }, { "epoch": 0.23, "learning_rate": 1.988954593593691e-05, "loss": 0.1016, "step": 971 }, { "epoch": 0.23, "learning_rate": 1.9889169978548827e-05, "loss": 0.2378, "step": 972 }, { "epoch": 0.23, "learning_rate": 1.9888793385981275e-05, "loss": 0.0768, "step": 973 }, { "epoch": 0.23, "learning_rate": 1.988841615825845e-05, "loss": 0.0978, "step": 974 }, { "epoch": 0.23, "learning_rate": 1.9888038295404577e-05, "loss": 0.0636, "step": 975 }, { "epoch": 0.23, "learning_rate": 1.9887659797443922e-05, "loss": 0.0359, "step": 976 }, { "epoch": 0.23, "learning_rate": 1.9887280664400803e-05, "loss": 0.0744, "step": 977 }, { "epoch": 0.23, "learning_rate": 1.988690089629957e-05, "loss": 0.1213, "step": 978 }, { "epoch": 0.23, "learning_rate": 1.9886520493164613e-05, "loss": 0.1025, "step": 979 }, { "epoch": 0.23, "learning_rate": 1.9886139455020362e-05, "loss": 0.0746, "step": 980 }, { "epoch": 0.23, "learning_rate": 1.98857577818913e-05, "loss": 0.0366, "step": 981 }, { "epoch": 0.23, "learning_rate": 1.9885375473801936e-05, "loss": 0.1141, "step": 982 }, { "epoch": 0.23, "learning_rate": 1.9884992530776827e-05, "loss": 0.1163, "step": 983 }, { "epoch": 0.23, "learning_rate": 1.9884608952840567e-05, "loss": 0.0691, "step": 984 }, { "epoch": 0.23, "learning_rate": 1.9884224740017796e-05, "loss": 0.1109, "step": 985 }, { "epoch": 0.23, "learning_rate": 1.988383989233319e-05, "loss": 0.1229, "step": 986 }, { "epoch": 0.23, "learning_rate": 1.9883454409811468e-05, "loss": 0.0703, "step": 987 }, { "epoch": 0.23, "learning_rate": 1.9883068292477392e-05, "loss": 0.0378, "step": 988 }, { "epoch": 0.23, "learning_rate": 1.9882681540355758e-05, "loss": 0.0895, "step": 989 }, { "epoch": 0.23, "learning_rate": 1.9882294153471414e-05, "loss": 0.0605, "step": 990 }, { "epoch": 0.23, "learning_rate": 1.9881906131849233e-05, "loss": 0.0579, "step": 991 }, { "epoch": 0.23, "learning_rate": 1.988151747551414e-05, "loss": 0.046, "step": 992 }, { "epoch": 0.23, "learning_rate": 1.9881128184491106e-05, "loss": 0.1067, "step": 993 }, { "epoch": 0.23, "learning_rate": 1.9880738258805125e-05, "loss": 0.0681, "step": 994 }, { "epoch": 0.23, "learning_rate": 1.9880347698481246e-05, "loss": 0.0354, "step": 995 }, { "epoch": 0.23, "learning_rate": 1.9879956503544555e-05, "loss": 0.0682, "step": 996 }, { "epoch": 0.23, "learning_rate": 1.9879564674020176e-05, "loss": 0.1228, "step": 997 }, { "epoch": 0.23, "learning_rate": 1.9879172209933282e-05, "loss": 0.0384, "step": 998 }, { "epoch": 0.23, "learning_rate": 1.9878779111309073e-05, "loss": 0.1597, "step": 999 }, { "epoch": 0.23, "learning_rate": 1.9878385378172806e-05, "loss": 0.1065, "step": 1000 }, { "epoch": 0.23, "learning_rate": 1.9877991010549765e-05, "loss": 0.0479, "step": 1001 }, { "epoch": 0.24, "learning_rate": 1.9877596008465278e-05, "loss": 0.1045, "step": 1002 }, { "epoch": 0.24, "learning_rate": 1.987720037194472e-05, "loss": 0.1469, "step": 1003 }, { "epoch": 0.24, "learning_rate": 1.9876804101013504e-05, "loss": 0.1293, "step": 1004 }, { "epoch": 0.24, "learning_rate": 1.9876407195697077e-05, "loss": 0.1881, "step": 1005 }, { "epoch": 0.24, "learning_rate": 1.9876009656020938e-05, "loss": 0.0931, "step": 1006 }, { "epoch": 0.24, "learning_rate": 1.9875611482010617e-05, "loss": 0.1576, "step": 1007 }, { "epoch": 0.24, "learning_rate": 1.9875212673691692e-05, "loss": 0.0916, "step": 1008 }, { "epoch": 0.24, "learning_rate": 1.9874813231089772e-05, "loss": 0.0752, "step": 1009 }, { "epoch": 0.24, "learning_rate": 1.987441315423052e-05, "loss": 0.0837, "step": 1010 }, { "epoch": 0.24, "learning_rate": 1.987401244313963e-05, "loss": 0.1398, "step": 1011 }, { "epoch": 0.24, "learning_rate": 1.987361109784284e-05, "loss": 0.0544, "step": 1012 }, { "epoch": 0.24, "learning_rate": 1.9873209118365925e-05, "loss": 0.0798, "step": 1013 }, { "epoch": 0.24, "learning_rate": 1.987280650473471e-05, "loss": 0.1038, "step": 1014 }, { "epoch": 0.24, "learning_rate": 1.987240325697505e-05, "loss": 0.0501, "step": 1015 }, { "epoch": 0.24, "learning_rate": 1.987199937511285e-05, "loss": 0.1101, "step": 1016 }, { "epoch": 0.24, "learning_rate": 1.9871594859174048e-05, "loss": 0.0426, "step": 1017 }, { "epoch": 0.24, "learning_rate": 1.9871189709184627e-05, "loss": 0.1339, "step": 1018 }, { "epoch": 0.24, "learning_rate": 1.9870783925170608e-05, "loss": 0.0938, "step": 1019 }, { "epoch": 0.24, "learning_rate": 1.9870377507158056e-05, "loss": 0.1302, "step": 1020 }, { "epoch": 0.24, "learning_rate": 1.9869970455173076e-05, "loss": 0.0834, "step": 1021 }, { "epoch": 0.24, "learning_rate": 1.9869562769241814e-05, "loss": 0.0965, "step": 1022 }, { "epoch": 0.24, "learning_rate": 1.9869154449390447e-05, "loss": 0.1226, "step": 1023 }, { "epoch": 0.24, "learning_rate": 1.9868745495645213e-05, "loss": 0.0745, "step": 1024 }, { "epoch": 0.24, "learning_rate": 1.9868335908032374e-05, "loss": 0.1374, "step": 1025 }, { "epoch": 0.24, "learning_rate": 1.9867925686578234e-05, "loss": 0.0877, "step": 1026 }, { "epoch": 0.24, "learning_rate": 1.986751483130915e-05, "loss": 0.1363, "step": 1027 }, { "epoch": 0.24, "learning_rate": 1.9867103342251502e-05, "loss": 0.0577, "step": 1028 }, { "epoch": 0.24, "learning_rate": 1.9866691219431725e-05, "loss": 0.0836, "step": 1029 }, { "epoch": 0.24, "learning_rate": 1.986627846287629e-05, "loss": 0.0467, "step": 1030 }, { "epoch": 0.24, "learning_rate": 1.9865865072611703e-05, "loss": 0.0609, "step": 1031 }, { "epoch": 0.24, "learning_rate": 1.986545104866452e-05, "loss": 0.0854, "step": 1032 }, { "epoch": 0.24, "learning_rate": 1.9865036391061338e-05, "loss": 0.1131, "step": 1033 }, { "epoch": 0.24, "learning_rate": 1.9864621099828783e-05, "loss": 0.0724, "step": 1034 }, { "epoch": 0.24, "learning_rate": 1.9864205174993533e-05, "loss": 0.0912, "step": 1035 }, { "epoch": 0.24, "learning_rate": 1.98637886165823e-05, "loss": 0.148, "step": 1036 }, { "epoch": 0.24, "learning_rate": 1.9863371424621842e-05, "loss": 0.052, "step": 1037 }, { "epoch": 0.24, "learning_rate": 1.9862953599138954e-05, "loss": 0.1028, "step": 1038 }, { "epoch": 0.24, "learning_rate": 1.9862535140160474e-05, "loss": 0.0842, "step": 1039 }, { "epoch": 0.24, "learning_rate": 1.986211604771328e-05, "loss": 0.0398, "step": 1040 }, { "epoch": 0.24, "learning_rate": 1.9861696321824285e-05, "loss": 0.061, "step": 1041 }, { "epoch": 0.24, "learning_rate": 1.9861275962520456e-05, "loss": 0.1711, "step": 1042 }, { "epoch": 0.24, "learning_rate": 1.9860854969828783e-05, "loss": 0.1499, "step": 1043 }, { "epoch": 0.25, "learning_rate": 1.9860433343776315e-05, "loss": 0.0582, "step": 1044 }, { "epoch": 0.25, "learning_rate": 1.9860011084390133e-05, "loss": 0.0846, "step": 1045 }, { "epoch": 0.25, "learning_rate": 1.9859588191697348e-05, "loss": 0.096, "step": 1046 }, { "epoch": 0.25, "learning_rate": 1.9859164665725138e-05, "loss": 0.042, "step": 1047 }, { "epoch": 0.25, "learning_rate": 1.985874050650069e-05, "loss": 0.0657, "step": 1048 }, { "epoch": 0.25, "learning_rate": 1.9858315714051262e-05, "loss": 0.0716, "step": 1049 }, { "epoch": 0.25, "learning_rate": 1.9857890288404128e-05, "loss": 0.1096, "step": 1050 }, { "epoch": 0.25, "learning_rate": 1.9857464229586612e-05, "loss": 0.0581, "step": 1051 }, { "epoch": 0.25, "learning_rate": 1.9857037537626094e-05, "loss": 0.0921, "step": 1052 }, { "epoch": 0.25, "learning_rate": 1.9856610212549965e-05, "loss": 0.1163, "step": 1053 }, { "epoch": 0.25, "learning_rate": 1.9856182254385677e-05, "loss": 0.0853, "step": 1054 }, { "epoch": 0.25, "learning_rate": 1.985575366316072e-05, "loss": 0.0875, "step": 1055 }, { "epoch": 0.25, "learning_rate": 1.9855324438902622e-05, "loss": 0.1248, "step": 1056 }, { "epoch": 0.25, "learning_rate": 1.9854894581638947e-05, "loss": 0.0253, "step": 1057 }, { "epoch": 0.25, "learning_rate": 1.985446409139731e-05, "loss": 0.1084, "step": 1058 }, { "epoch": 0.25, "learning_rate": 1.985403296820536e-05, "loss": 0.0875, "step": 1059 }, { "epoch": 0.25, "learning_rate": 1.985360121209079e-05, "loss": 0.054, "step": 1060 }, { "epoch": 0.25, "learning_rate": 1.9853168823081326e-05, "loss": 0.0628, "step": 1061 }, { "epoch": 0.25, "learning_rate": 1.9852735801204744e-05, "loss": 0.0687, "step": 1062 }, { "epoch": 0.25, "learning_rate": 1.9852302146488854e-05, "loss": 0.0974, "step": 1063 }, { "epoch": 0.25, "learning_rate": 1.9851867858961514e-05, "loss": 0.0686, "step": 1064 }, { "epoch": 0.25, "learning_rate": 1.9851432938650618e-05, "loss": 0.1368, "step": 1065 }, { "epoch": 0.25, "learning_rate": 1.9850997385584097e-05, "loss": 0.1346, "step": 1066 }, { "epoch": 0.25, "learning_rate": 1.9850561199789928e-05, "loss": 0.1579, "step": 1067 }, { "epoch": 0.25, "learning_rate": 1.985012438129613e-05, "loss": 0.0996, "step": 1068 }, { "epoch": 0.25, "learning_rate": 1.9849686930130754e-05, "loss": 0.0852, "step": 1069 }, { "epoch": 0.25, "learning_rate": 1.98492488463219e-05, "loss": 0.0807, "step": 1070 }, { "epoch": 0.25, "learning_rate": 1.9848810129897713e-05, "loss": 0.0916, "step": 1071 }, { "epoch": 0.25, "learning_rate": 1.984837078088636e-05, "loss": 0.0649, "step": 1072 }, { "epoch": 0.25, "learning_rate": 1.984793079931607e-05, "loss": 0.0527, "step": 1073 }, { "epoch": 0.25, "learning_rate": 1.9847490185215093e-05, "loss": 0.087, "step": 1074 }, { "epoch": 0.25, "learning_rate": 1.984704893861174e-05, "loss": 0.1393, "step": 1075 }, { "epoch": 0.25, "learning_rate": 1.9846607059534344e-05, "loss": 0.2714, "step": 1076 }, { "epoch": 0.25, "learning_rate": 1.9846164548011294e-05, "loss": 0.1821, "step": 1077 }, { "epoch": 0.25, "learning_rate": 1.9845721404071002e-05, "loss": 0.2203, "step": 1078 }, { "epoch": 0.25, "learning_rate": 1.9845277627741946e-05, "loss": 0.3285, "step": 1079 }, { "epoch": 0.25, "learning_rate": 1.9844833219052617e-05, "loss": 0.4112, "step": 1080 }, { "epoch": 0.25, "learning_rate": 1.9844388178031563e-05, "loss": 0.2797, "step": 1081 }, { "epoch": 0.25, "learning_rate": 1.9843942504707373e-05, "loss": 0.2162, "step": 1082 }, { "epoch": 0.25, "learning_rate": 1.9843496199108663e-05, "loss": 0.2265, "step": 1083 }, { "epoch": 0.25, "learning_rate": 1.984304926126411e-05, "loss": 0.2527, "step": 1084 }, { "epoch": 0.25, "learning_rate": 1.9842601691202414e-05, "loss": 0.1171, "step": 1085 }, { "epoch": 0.25, "learning_rate": 1.9842153488952326e-05, "loss": 0.0707, "step": 1086 }, { "epoch": 0.26, "learning_rate": 1.9841704654542634e-05, "loss": 0.1102, "step": 1087 }, { "epoch": 0.26, "learning_rate": 1.984125518800216e-05, "loss": 0.0665, "step": 1088 }, { "epoch": 0.26, "learning_rate": 1.984080508935978e-05, "loss": 0.147, "step": 1089 }, { "epoch": 0.26, "learning_rate": 1.9840354358644404e-05, "loss": 0.0241, "step": 1090 }, { "epoch": 0.26, "learning_rate": 1.9839902995884976e-05, "loss": 0.0687, "step": 1091 }, { "epoch": 0.26, "learning_rate": 1.9839451001110492e-05, "loss": 0.0887, "step": 1092 }, { "epoch": 0.26, "learning_rate": 1.9838998374349984e-05, "loss": 0.1619, "step": 1093 }, { "epoch": 0.26, "learning_rate": 1.983854511563252e-05, "loss": 0.0487, "step": 1094 }, { "epoch": 0.26, "learning_rate": 1.9838091224987215e-05, "loss": 0.0691, "step": 1095 }, { "epoch": 0.26, "learning_rate": 1.9837636702443227e-05, "loss": 0.0719, "step": 1096 }, { "epoch": 0.26, "learning_rate": 1.9837181548029743e-05, "loss": 0.0811, "step": 1097 }, { "epoch": 0.26, "learning_rate": 1.9836725761776e-05, "loss": 0.0509, "step": 1098 }, { "epoch": 0.26, "learning_rate": 1.9836269343711273e-05, "loss": 0.1188, "step": 1099 }, { "epoch": 0.26, "learning_rate": 1.9835812293864878e-05, "loss": 0.1327, "step": 1100 }, { "epoch": 0.26, "learning_rate": 1.983535461226617e-05, "loss": 0.1602, "step": 1101 }, { "epoch": 0.26, "learning_rate": 1.9834896298944545e-05, "loss": 0.1288, "step": 1102 }, { "epoch": 0.26, "learning_rate": 1.9834437353929446e-05, "loss": 0.1026, "step": 1103 }, { "epoch": 0.26, "learning_rate": 1.9833977777250345e-05, "loss": 0.0955, "step": 1104 }, { "epoch": 0.26, "learning_rate": 1.9833517568936763e-05, "loss": 0.0835, "step": 1105 }, { "epoch": 0.26, "learning_rate": 1.983305672901826e-05, "loss": 0.0694, "step": 1106 }, { "epoch": 0.26, "learning_rate": 1.9832595257524433e-05, "loss": 0.0522, "step": 1107 }, { "epoch": 0.26, "learning_rate": 1.983213315448492e-05, "loss": 0.0239, "step": 1108 }, { "epoch": 0.26, "learning_rate": 1.983167041992941e-05, "loss": 0.1235, "step": 1109 }, { "epoch": 0.26, "learning_rate": 1.983120705388762e-05, "loss": 0.1012, "step": 1110 }, { "epoch": 0.26, "learning_rate": 1.983074305638931e-05, "loss": 0.0403, "step": 1111 }, { "epoch": 0.26, "learning_rate": 1.9830278427464286e-05, "loss": 0.086, "step": 1112 }, { "epoch": 0.26, "learning_rate": 1.9829813167142386e-05, "loss": 0.0835, "step": 1113 }, { "epoch": 0.26, "learning_rate": 1.9829347275453497e-05, "loss": 0.0822, "step": 1114 }, { "epoch": 0.26, "learning_rate": 1.9828880752427544e-05, "loss": 0.0762, "step": 1115 }, { "epoch": 0.26, "learning_rate": 1.982841359809449e-05, "loss": 0.1098, "step": 1116 }, { "epoch": 0.26, "learning_rate": 1.982794581248434e-05, "loss": 0.0601, "step": 1117 }, { "epoch": 0.26, "learning_rate": 1.982747739562714e-05, "loss": 0.1032, "step": 1118 }, { "epoch": 0.26, "learning_rate": 1.982700834755298e-05, "loss": 0.0926, "step": 1119 }, { "epoch": 0.26, "learning_rate": 1.9826538668291985e-05, "loss": 0.0379, "step": 1120 }, { "epoch": 0.26, "learning_rate": 1.9826068357874316e-05, "loss": 0.0266, "step": 1121 }, { "epoch": 0.26, "learning_rate": 1.982559741633019e-05, "loss": 0.0586, "step": 1122 }, { "epoch": 0.26, "learning_rate": 1.9825125843689846e-05, "loss": 0.0265, "step": 1123 }, { "epoch": 0.26, "learning_rate": 1.982465363998358e-05, "loss": 0.0587, "step": 1124 }, { "epoch": 0.26, "learning_rate": 1.9824180805241725e-05, "loss": 0.0788, "step": 1125 }, { "epoch": 0.26, "learning_rate": 1.9823707339494644e-05, "loss": 0.1474, "step": 1126 }, { "epoch": 0.26, "learning_rate": 1.9823233242772752e-05, "loss": 0.0691, "step": 1127 }, { "epoch": 0.26, "learning_rate": 1.9822758515106493e-05, "loss": 0.0424, "step": 1128 }, { "epoch": 0.26, "learning_rate": 1.9822283156526364e-05, "loss": 0.0663, "step": 1129 }, { "epoch": 0.27, "learning_rate": 1.98218071670629e-05, "loss": 0.1042, "step": 1130 }, { "epoch": 0.27, "learning_rate": 1.982133054674667e-05, "loss": 0.0486, "step": 1131 }, { "epoch": 0.27, "learning_rate": 1.9820853295608285e-05, "loss": 0.1087, "step": 1132 }, { "epoch": 0.27, "learning_rate": 1.9820375413678402e-05, "loss": 0.075, "step": 1133 }, { "epoch": 0.27, "learning_rate": 1.9819896900987715e-05, "loss": 0.0674, "step": 1134 }, { "epoch": 0.27, "learning_rate": 1.9819417757566958e-05, "loss": 0.0639, "step": 1135 }, { "epoch": 0.27, "learning_rate": 1.981893798344691e-05, "loss": 0.1375, "step": 1136 }, { "epoch": 0.27, "learning_rate": 1.981845757865838e-05, "loss": 0.0585, "step": 1137 }, { "epoch": 0.27, "learning_rate": 1.981797654323223e-05, "loss": 0.0954, "step": 1138 }, { "epoch": 0.27, "learning_rate": 1.9817494877199356e-05, "loss": 0.0607, "step": 1139 }, { "epoch": 0.27, "learning_rate": 1.981701258059069e-05, "loss": 0.09, "step": 1140 }, { "epoch": 0.27, "learning_rate": 1.981652965343722e-05, "loss": 0.083, "step": 1141 }, { "epoch": 0.27, "learning_rate": 1.9816046095769953e-05, "loss": 0.1925, "step": 1142 }, { "epoch": 0.27, "learning_rate": 1.9815561907619954e-05, "loss": 0.043, "step": 1143 }, { "epoch": 0.27, "learning_rate": 1.981507708901832e-05, "loss": 0.0502, "step": 1144 }, { "epoch": 0.27, "learning_rate": 1.9814591639996194e-05, "loss": 0.1118, "step": 1145 }, { "epoch": 0.27, "learning_rate": 1.981410556058476e-05, "loss": 0.0658, "step": 1146 }, { "epoch": 0.27, "learning_rate": 1.9813618850815223e-05, "loss": 0.0688, "step": 1147 }, { "epoch": 0.27, "learning_rate": 1.9813131510718858e-05, "loss": 0.0364, "step": 1148 }, { "epoch": 0.27, "learning_rate": 1.9812643540326967e-05, "loss": 0.1086, "step": 1149 }, { "epoch": 0.27, "learning_rate": 1.9812154939670885e-05, "loss": 0.0679, "step": 1150 }, { "epoch": 0.27, "learning_rate": 1.9811665708781998e-05, "loss": 0.1664, "step": 1151 }, { "epoch": 0.27, "learning_rate": 1.981117584769173e-05, "loss": 0.1089, "step": 1152 }, { "epoch": 0.27, "learning_rate": 1.9810685356431544e-05, "loss": 0.1084, "step": 1153 }, { "epoch": 0.27, "learning_rate": 1.9810194235032943e-05, "loss": 0.0689, "step": 1154 }, { "epoch": 0.27, "learning_rate": 1.9809702483527474e-05, "loss": 0.1117, "step": 1155 }, { "epoch": 0.27, "learning_rate": 1.9809210101946725e-05, "loss": 0.0794, "step": 1156 }, { "epoch": 0.27, "learning_rate": 1.980871709032231e-05, "loss": 0.0478, "step": 1157 }, { "epoch": 0.27, "learning_rate": 1.9808223448685906e-05, "loss": 0.0417, "step": 1158 }, { "epoch": 0.27, "learning_rate": 1.9807729177069218e-05, "loss": 0.1192, "step": 1159 }, { "epoch": 0.27, "learning_rate": 1.980723427550399e-05, "loss": 0.1115, "step": 1160 }, { "epoch": 0.27, "learning_rate": 1.980673874402201e-05, "loss": 0.0394, "step": 1161 }, { "epoch": 0.27, "learning_rate": 1.9806242582655108e-05, "loss": 0.1093, "step": 1162 }, { "epoch": 0.27, "learning_rate": 1.9805745791435143e-05, "loss": 0.0596, "step": 1163 }, { "epoch": 0.27, "learning_rate": 1.9805248370394043e-05, "loss": 0.1031, "step": 1164 }, { "epoch": 0.27, "learning_rate": 1.980475031956374e-05, "loss": 0.0729, "step": 1165 }, { "epoch": 0.27, "learning_rate": 1.9804251638976226e-05, "loss": 0.1174, "step": 1166 }, { "epoch": 0.27, "learning_rate": 1.980375232866354e-05, "loss": 0.0868, "step": 1167 }, { "epoch": 0.27, "learning_rate": 1.9803252388657747e-05, "loss": 0.0834, "step": 1168 }, { "epoch": 0.27, "learning_rate": 1.9802751818990955e-05, "loss": 0.1208, "step": 1169 }, { "epoch": 0.27, "learning_rate": 1.9802250619695323e-05, "loss": 0.1401, "step": 1170 }, { "epoch": 0.27, "learning_rate": 1.9801748790803036e-05, "loss": 0.0787, "step": 1171 }, { "epoch": 0.28, "learning_rate": 1.9801246332346328e-05, "loss": 0.0596, "step": 1172 }, { "epoch": 0.28, "learning_rate": 1.9800743244357475e-05, "loss": 0.0998, "step": 1173 }, { "epoch": 0.28, "learning_rate": 1.980023952686879e-05, "loss": 0.2761, "step": 1174 }, { "epoch": 0.28, "learning_rate": 1.979973517991262e-05, "loss": 0.1355, "step": 1175 }, { "epoch": 0.28, "learning_rate": 1.9799230203521367e-05, "loss": 0.0568, "step": 1176 }, { "epoch": 0.28, "learning_rate": 1.979872459772746e-05, "loss": 0.1361, "step": 1177 }, { "epoch": 0.28, "learning_rate": 1.979821836256338e-05, "loss": 0.0381, "step": 1178 }, { "epoch": 0.28, "learning_rate": 1.9797711498061636e-05, "loss": 0.1071, "step": 1179 }, { "epoch": 0.28, "learning_rate": 1.9797204004254787e-05, "loss": 0.0399, "step": 1180 }, { "epoch": 0.28, "learning_rate": 1.9796695881175432e-05, "loss": 0.1948, "step": 1181 }, { "epoch": 0.28, "learning_rate": 1.97961871288562e-05, "loss": 0.0822, "step": 1182 }, { "epoch": 0.28, "learning_rate": 1.9795677747329778e-05, "loss": 0.0768, "step": 1183 }, { "epoch": 0.28, "learning_rate": 1.9795167736628873e-05, "loss": 0.1236, "step": 1184 }, { "epoch": 0.28, "learning_rate": 1.9794657096786248e-05, "loss": 0.0304, "step": 1185 }, { "epoch": 0.28, "learning_rate": 1.9794145827834702e-05, "loss": 0.1068, "step": 1186 }, { "epoch": 0.28, "learning_rate": 1.9793633929807077e-05, "loss": 0.0457, "step": 1187 }, { "epoch": 0.28, "learning_rate": 1.9793121402736244e-05, "loss": 0.1432, "step": 1188 }, { "epoch": 0.28, "learning_rate": 1.9792608246655126e-05, "loss": 0.0728, "step": 1189 }, { "epoch": 0.28, "learning_rate": 1.9792094461596686e-05, "loss": 0.1895, "step": 1190 }, { "epoch": 0.28, "learning_rate": 1.979158004759392e-05, "loss": 0.12, "step": 1191 }, { "epoch": 0.28, "learning_rate": 1.9791065004679873e-05, "loss": 0.0186, "step": 1192 }, { "epoch": 0.28, "learning_rate": 1.979054933288762e-05, "loss": 0.0484, "step": 1193 }, { "epoch": 0.28, "learning_rate": 1.9790033032250287e-05, "loss": 0.1068, "step": 1194 }, { "epoch": 0.28, "learning_rate": 1.978951610280104e-05, "loss": 0.1201, "step": 1195 }, { "epoch": 0.28, "learning_rate": 1.978899854457307e-05, "loss": 0.0796, "step": 1196 }, { "epoch": 0.28, "learning_rate": 1.978848035759963e-05, "loss": 0.1029, "step": 1197 }, { "epoch": 0.28, "learning_rate": 1.9787961541913995e-05, "loss": 0.0401, "step": 1198 }, { "epoch": 0.28, "learning_rate": 1.9787442097549492e-05, "loss": 0.0628, "step": 1199 }, { "epoch": 0.28, "learning_rate": 1.978692202453949e-05, "loss": 0.0709, "step": 1200 }, { "epoch": 0.28, "learning_rate": 1.978640132291739e-05, "loss": 0.1033, "step": 1201 }, { "epoch": 0.28, "learning_rate": 1.978587999271663e-05, "loss": 0.0804, "step": 1202 }, { "epoch": 0.28, "learning_rate": 1.9785358033970697e-05, "loss": 0.0396, "step": 1203 }, { "epoch": 0.28, "learning_rate": 1.9784835446713128e-05, "loss": 0.0998, "step": 1204 }, { "epoch": 0.28, "learning_rate": 1.978431223097747e-05, "loss": 0.0676, "step": 1205 }, { "epoch": 0.28, "learning_rate": 1.978378838679735e-05, "loss": 0.0918, "step": 1206 }, { "epoch": 0.28, "learning_rate": 1.9783263914206397e-05, "loss": 0.1134, "step": 1207 }, { "epoch": 0.28, "learning_rate": 1.97827388132383e-05, "loss": 0.171, "step": 1208 }, { "epoch": 0.28, "learning_rate": 1.9782213083926797e-05, "loss": 0.1468, "step": 1209 }, { "epoch": 0.28, "learning_rate": 1.9781686726305645e-05, "loss": 0.2773, "step": 1210 }, { "epoch": 0.28, "learning_rate": 1.978115974040866e-05, "loss": 0.2938, "step": 1211 }, { "epoch": 0.28, "learning_rate": 1.9780632126269684e-05, "loss": 0.2764, "step": 1212 }, { "epoch": 0.28, "learning_rate": 1.9780103883922606e-05, "loss": 0.1977, "step": 1213 }, { "epoch": 0.28, "learning_rate": 1.9779575013401357e-05, "loss": 0.1423, "step": 1214 }, { "epoch": 0.29, "learning_rate": 1.9779045514739904e-05, "loss": 0.1389, "step": 1215 }, { "epoch": 0.29, "learning_rate": 1.977851538797226e-05, "loss": 0.0888, "step": 1216 }, { "epoch": 0.29, "learning_rate": 1.977798463313247e-05, "loss": 0.0438, "step": 1217 }, { "epoch": 0.29, "learning_rate": 1.9777453250254634e-05, "loss": 0.0648, "step": 1218 }, { "epoch": 0.29, "learning_rate": 1.977692123937287e-05, "loss": 0.0391, "step": 1219 }, { "epoch": 0.29, "learning_rate": 1.977638860052136e-05, "loss": 0.071, "step": 1220 }, { "epoch": 0.29, "learning_rate": 1.9775855333734303e-05, "loss": 0.0509, "step": 1221 }, { "epoch": 0.29, "learning_rate": 1.977532143904596e-05, "loss": 0.0972, "step": 1222 }, { "epoch": 0.29, "learning_rate": 1.9774786916490622e-05, "loss": 0.0501, "step": 1223 }, { "epoch": 0.29, "learning_rate": 1.9774251766102623e-05, "loss": 0.0512, "step": 1224 }, { "epoch": 0.29, "learning_rate": 1.977371598791633e-05, "loss": 0.0669, "step": 1225 }, { "epoch": 0.29, "learning_rate": 1.9773179581966158e-05, "loss": 0.0681, "step": 1226 }, { "epoch": 0.29, "learning_rate": 1.977264254828656e-05, "loss": 0.0432, "step": 1227 }, { "epoch": 0.29, "learning_rate": 1.9772104886912032e-05, "loss": 0.0357, "step": 1228 }, { "epoch": 0.29, "learning_rate": 1.9771566597877107e-05, "loss": 0.0549, "step": 1229 }, { "epoch": 0.29, "learning_rate": 1.9771027681216356e-05, "loss": 0.0723, "step": 1230 }, { "epoch": 0.29, "learning_rate": 1.97704881369644e-05, "loss": 0.1085, "step": 1231 }, { "epoch": 0.29, "learning_rate": 1.9769947965155887e-05, "loss": 0.06, "step": 1232 }, { "epoch": 0.29, "learning_rate": 1.9769407165825517e-05, "loss": 0.0728, "step": 1233 }, { "epoch": 0.29, "learning_rate": 1.976886573900802e-05, "loss": 0.0329, "step": 1234 }, { "epoch": 0.29, "learning_rate": 1.9768323684738177e-05, "loss": 0.0922, "step": 1235 }, { "epoch": 0.29, "learning_rate": 1.9767781003050805e-05, "loss": 0.1406, "step": 1236 }, { "epoch": 0.29, "learning_rate": 1.9767237693980752e-05, "loss": 0.112, "step": 1237 }, { "epoch": 0.29, "learning_rate": 1.9766693757562923e-05, "loss": 0.1095, "step": 1238 }, { "epoch": 0.29, "learning_rate": 1.9766149193832253e-05, "loss": 0.0157, "step": 1239 }, { "epoch": 0.29, "learning_rate": 1.9765604002823717e-05, "loss": 0.1012, "step": 1240 }, { "epoch": 0.29, "learning_rate": 1.976505818457233e-05, "loss": 0.0393, "step": 1241 }, { "epoch": 0.29, "learning_rate": 1.976451173911316e-05, "loss": 0.1, "step": 1242 }, { "epoch": 0.29, "learning_rate": 1.9763964666481295e-05, "loss": 0.0954, "step": 1243 }, { "epoch": 0.29, "learning_rate": 1.976341696671188e-05, "loss": 0.0644, "step": 1244 }, { "epoch": 0.29, "learning_rate": 1.976286863984009e-05, "loss": 0.104, "step": 1245 }, { "epoch": 0.29, "learning_rate": 1.9762319685901142e-05, "loss": 0.1438, "step": 1246 }, { "epoch": 0.29, "learning_rate": 1.97617701049303e-05, "loss": 0.1268, "step": 1247 }, { "epoch": 0.29, "learning_rate": 1.976121989696286e-05, "loss": 0.1255, "step": 1248 }, { "epoch": 0.29, "learning_rate": 1.9760669062034163e-05, "loss": 0.1222, "step": 1249 }, { "epoch": 0.29, "learning_rate": 1.976011760017959e-05, "loss": 0.0744, "step": 1250 }, { "epoch": 0.29, "learning_rate": 1.975956551143456e-05, "loss": 0.0791, "step": 1251 }, { "epoch": 0.29, "learning_rate": 1.9759012795834537e-05, "loss": 0.0927, "step": 1252 }, { "epoch": 0.29, "learning_rate": 1.9758459453415014e-05, "loss": 0.0785, "step": 1253 }, { "epoch": 0.29, "learning_rate": 1.975790548421154e-05, "loss": 0.0873, "step": 1254 }, { "epoch": 0.29, "learning_rate": 1.9757350888259692e-05, "loss": 0.071, "step": 1255 }, { "epoch": 0.29, "learning_rate": 1.975679566559509e-05, "loss": 0.1199, "step": 1256 }, { "epoch": 0.3, "learning_rate": 1.9756239816253405e-05, "loss": 0.0956, "step": 1257 }, { "epoch": 0.3, "learning_rate": 1.9755683340270328e-05, "loss": 0.0926, "step": 1258 }, { "epoch": 0.3, "learning_rate": 1.9755126237681607e-05, "loss": 0.0467, "step": 1259 }, { "epoch": 0.3, "learning_rate": 1.9754568508523024e-05, "loss": 0.1667, "step": 1260 }, { "epoch": 0.3, "learning_rate": 1.97540101528304e-05, "loss": 0.0613, "step": 1261 }, { "epoch": 0.3, "learning_rate": 1.97534511706396e-05, "loss": 0.0656, "step": 1262 }, { "epoch": 0.3, "learning_rate": 1.975289156198653e-05, "loss": 0.0647, "step": 1263 }, { "epoch": 0.3, "learning_rate": 1.9752331326907127e-05, "loss": 0.0657, "step": 1264 }, { "epoch": 0.3, "learning_rate": 1.975177046543738e-05, "loss": 0.0446, "step": 1265 }, { "epoch": 0.3, "learning_rate": 1.975120897761331e-05, "loss": 0.1934, "step": 1266 }, { "epoch": 0.3, "learning_rate": 1.9750646863470983e-05, "loss": 0.1363, "step": 1267 }, { "epoch": 0.3, "learning_rate": 1.9750084123046503e-05, "loss": 0.0944, "step": 1268 }, { "epoch": 0.3, "learning_rate": 1.9749520756376016e-05, "loss": 0.1052, "step": 1269 }, { "epoch": 0.3, "learning_rate": 1.9748956763495705e-05, "loss": 0.0535, "step": 1270 }, { "epoch": 0.3, "learning_rate": 1.974839214444179e-05, "loss": 0.0305, "step": 1271 }, { "epoch": 0.3, "learning_rate": 1.974782689925055e-05, "loss": 0.0253, "step": 1272 }, { "epoch": 0.3, "learning_rate": 1.974726102795828e-05, "loss": 0.0719, "step": 1273 }, { "epoch": 0.3, "learning_rate": 1.9746694530601325e-05, "loss": 0.0954, "step": 1274 }, { "epoch": 0.3, "learning_rate": 1.974612740721608e-05, "loss": 0.0529, "step": 1275 }, { "epoch": 0.3, "learning_rate": 1.9745559657838964e-05, "loss": 0.1377, "step": 1276 }, { "epoch": 0.3, "learning_rate": 1.9744991282506444e-05, "loss": 0.0479, "step": 1277 }, { "epoch": 0.3, "learning_rate": 1.974442228125503e-05, "loss": 0.0824, "step": 1278 }, { "epoch": 0.3, "learning_rate": 1.9743852654121266e-05, "loss": 0.0515, "step": 1279 }, { "epoch": 0.3, "learning_rate": 1.9743282401141738e-05, "loss": 0.1423, "step": 1280 }, { "epoch": 0.3, "learning_rate": 1.9742711522353075e-05, "loss": 0.0418, "step": 1281 }, { "epoch": 0.3, "learning_rate": 1.9742140017791945e-05, "loss": 0.0947, "step": 1282 }, { "epoch": 0.3, "learning_rate": 1.9741567887495058e-05, "loss": 0.0175, "step": 1283 }, { "epoch": 0.3, "learning_rate": 1.9740995131499154e-05, "loss": 0.0614, "step": 1284 }, { "epoch": 0.3, "learning_rate": 1.9740421749841028e-05, "loss": 0.0711, "step": 1285 }, { "epoch": 0.3, "learning_rate": 1.9739847742557504e-05, "loss": 0.0593, "step": 1286 }, { "epoch": 0.3, "learning_rate": 1.9739273109685456e-05, "loss": 0.0504, "step": 1287 }, { "epoch": 0.3, "learning_rate": 1.9738697851261786e-05, "loss": 0.0145, "step": 1288 }, { "epoch": 0.3, "learning_rate": 1.9738121967323448e-05, "loss": 0.1488, "step": 1289 }, { "epoch": 0.3, "learning_rate": 1.973754545790743e-05, "loss": 0.2336, "step": 1290 }, { "epoch": 0.3, "learning_rate": 1.9736968323050756e-05, "loss": 0.1486, "step": 1291 }, { "epoch": 0.3, "learning_rate": 1.97363905627905e-05, "loss": 0.0666, "step": 1292 }, { "epoch": 0.3, "learning_rate": 1.9735812177163773e-05, "loss": 0.1207, "step": 1293 }, { "epoch": 0.3, "learning_rate": 1.9735233166207716e-05, "loss": 0.0731, "step": 1294 }, { "epoch": 0.3, "learning_rate": 1.973465352995953e-05, "loss": 0.0623, "step": 1295 }, { "epoch": 0.3, "learning_rate": 1.973407326845644e-05, "loss": 0.0836, "step": 1296 }, { "epoch": 0.3, "learning_rate": 1.973349238173571e-05, "loss": 0.0406, "step": 1297 }, { "epoch": 0.3, "learning_rate": 1.973291086983466e-05, "loss": 0.1393, "step": 1298 }, { "epoch": 0.3, "learning_rate": 1.9732328732790636e-05, "loss": 0.1214, "step": 1299 }, { "epoch": 0.31, "learning_rate": 1.973174597064103e-05, "loss": 0.0588, "step": 1300 }, { "epoch": 0.31, "learning_rate": 1.973116258342327e-05, "loss": 0.1703, "step": 1301 }, { "epoch": 0.31, "learning_rate": 1.9730578571174832e-05, "loss": 0.0668, "step": 1302 }, { "epoch": 0.31, "learning_rate": 1.972999393393322e-05, "loss": 0.0907, "step": 1303 }, { "epoch": 0.31, "learning_rate": 1.972940867173599e-05, "loss": 0.0536, "step": 1304 }, { "epoch": 0.31, "learning_rate": 1.972882278462073e-05, "loss": 0.0436, "step": 1305 }, { "epoch": 0.31, "learning_rate": 1.9728236272625073e-05, "loss": 0.0571, "step": 1306 }, { "epoch": 0.31, "learning_rate": 1.9727649135786694e-05, "loss": 0.1317, "step": 1307 }, { "epoch": 0.31, "learning_rate": 1.97270613741433e-05, "loss": 0.1249, "step": 1308 }, { "epoch": 0.31, "learning_rate": 1.9726472987732643e-05, "loss": 0.1129, "step": 1309 }, { "epoch": 0.31, "learning_rate": 1.972588397659252e-05, "loss": 0.1089, "step": 1310 }, { "epoch": 0.31, "learning_rate": 1.9725294340760754e-05, "loss": 0.1122, "step": 1311 }, { "epoch": 0.31, "learning_rate": 1.972470408027523e-05, "loss": 0.0731, "step": 1312 }, { "epoch": 0.31, "learning_rate": 1.9724113195173845e-05, "loss": 0.0685, "step": 1313 }, { "epoch": 0.31, "learning_rate": 1.972352168549456e-05, "loss": 0.0945, "step": 1314 }, { "epoch": 0.31, "learning_rate": 1.972292955127537e-05, "loss": 0.117, "step": 1315 }, { "epoch": 0.31, "learning_rate": 1.972233679255431e-05, "loss": 0.0727, "step": 1316 }, { "epoch": 0.31, "learning_rate": 1.9721743409369437e-05, "loss": 0.0373, "step": 1317 }, { "epoch": 0.31, "learning_rate": 1.9721149401758878e-05, "loss": 0.1242, "step": 1318 }, { "epoch": 0.31, "learning_rate": 1.9720554769760784e-05, "loss": 0.0393, "step": 1319 }, { "epoch": 0.31, "learning_rate": 1.9719959513413346e-05, "loss": 0.0378, "step": 1320 }, { "epoch": 0.31, "learning_rate": 1.9719363632754796e-05, "loss": 0.1442, "step": 1321 }, { "epoch": 0.31, "learning_rate": 1.971876712782341e-05, "loss": 0.0365, "step": 1322 }, { "epoch": 0.31, "learning_rate": 1.97181699986575e-05, "loss": 0.0559, "step": 1323 }, { "epoch": 0.31, "learning_rate": 1.971757224529542e-05, "loss": 0.0704, "step": 1324 }, { "epoch": 0.31, "learning_rate": 1.971697386777556e-05, "loss": 0.1053, "step": 1325 }, { "epoch": 0.31, "learning_rate": 1.971637486613636e-05, "loss": 0.0135, "step": 1326 }, { "epoch": 0.31, "learning_rate": 1.971577524041629e-05, "loss": 0.0672, "step": 1327 }, { "epoch": 0.31, "learning_rate": 1.9715174990653864e-05, "loss": 0.1625, "step": 1328 }, { "epoch": 0.31, "learning_rate": 1.9714574116887638e-05, "loss": 0.1803, "step": 1329 }, { "epoch": 0.31, "learning_rate": 1.9713972619156207e-05, "loss": 0.0871, "step": 1330 }, { "epoch": 0.31, "learning_rate": 1.9713370497498196e-05, "loss": 0.1134, "step": 1331 }, { "epoch": 0.31, "learning_rate": 1.971276775195229e-05, "loss": 0.141, "step": 1332 }, { "epoch": 0.31, "learning_rate": 1.9712164382557198e-05, "loss": 0.1524, "step": 1333 }, { "epoch": 0.31, "learning_rate": 1.9711560389351674e-05, "loss": 0.2064, "step": 1334 }, { "epoch": 0.31, "learning_rate": 1.9710955772374513e-05, "loss": 0.1369, "step": 1335 }, { "epoch": 0.31, "learning_rate": 1.9710350531664556e-05, "loss": 0.1447, "step": 1336 }, { "epoch": 0.31, "learning_rate": 1.9709744667260662e-05, "loss": 0.1404, "step": 1337 }, { "epoch": 0.31, "learning_rate": 1.9709138179201765e-05, "loss": 0.0295, "step": 1338 }, { "epoch": 0.31, "learning_rate": 1.9708531067526802e-05, "loss": 0.084, "step": 1339 }, { "epoch": 0.31, "learning_rate": 1.970792333227478e-05, "loss": 0.0909, "step": 1340 }, { "epoch": 0.31, "learning_rate": 1.9707314973484726e-05, "loss": 0.0488, "step": 1341 }, { "epoch": 0.31, "learning_rate": 1.970670599119572e-05, "loss": 0.0629, "step": 1342 }, { "epoch": 0.32, "learning_rate": 1.9706096385446873e-05, "loss": 0.1384, "step": 1343 }, { "epoch": 0.32, "learning_rate": 1.9705486156277343e-05, "loss": 0.1313, "step": 1344 }, { "epoch": 0.32, "learning_rate": 1.9704875303726323e-05, "loss": 0.013, "step": 1345 }, { "epoch": 0.32, "learning_rate": 1.9704263827833047e-05, "loss": 0.0477, "step": 1346 }, { "epoch": 0.32, "learning_rate": 1.970365172863679e-05, "loss": 0.0382, "step": 1347 }, { "epoch": 0.32, "learning_rate": 1.9703039006176874e-05, "loss": 0.0663, "step": 1348 }, { "epoch": 0.32, "learning_rate": 1.9702425660492644e-05, "loss": 0.1258, "step": 1349 }, { "epoch": 0.32, "learning_rate": 1.97018116916235e-05, "loss": 0.0287, "step": 1350 }, { "epoch": 0.32, "learning_rate": 1.970119709960888e-05, "loss": 0.0656, "step": 1351 }, { "epoch": 0.32, "learning_rate": 1.970058188448825e-05, "loss": 0.0616, "step": 1352 }, { "epoch": 0.32, "learning_rate": 1.9699966046301136e-05, "loss": 0.0594, "step": 1353 }, { "epoch": 0.32, "learning_rate": 1.9699349585087084e-05, "loss": 0.0402, "step": 1354 }, { "epoch": 0.32, "learning_rate": 1.9698732500885695e-05, "loss": 0.3007, "step": 1355 }, { "epoch": 0.32, "learning_rate": 1.9698114793736606e-05, "loss": 0.0943, "step": 1356 }, { "epoch": 0.32, "learning_rate": 1.969749646367948e-05, "loss": 0.1124, "step": 1357 }, { "epoch": 0.32, "learning_rate": 1.9696877510754045e-05, "loss": 0.0992, "step": 1358 }, { "epoch": 0.32, "learning_rate": 1.9696257935000055e-05, "loss": 0.0334, "step": 1359 }, { "epoch": 0.32, "learning_rate": 1.96956377364573e-05, "loss": 0.051, "step": 1360 }, { "epoch": 0.32, "learning_rate": 1.9695016915165615e-05, "loss": 0.031, "step": 1361 }, { "epoch": 0.32, "learning_rate": 1.969439547116488e-05, "loss": 0.0616, "step": 1362 }, { "epoch": 0.32, "learning_rate": 1.969377340449501e-05, "loss": 0.0587, "step": 1363 }, { "epoch": 0.32, "learning_rate": 1.969315071519596e-05, "loss": 0.0845, "step": 1364 }, { "epoch": 0.32, "learning_rate": 1.9692527403307716e-05, "loss": 0.1262, "step": 1365 }, { "epoch": 0.32, "learning_rate": 1.9691903468870326e-05, "loss": 0.0414, "step": 1366 }, { "epoch": 0.32, "learning_rate": 1.9691278911923856e-05, "loss": 0.082, "step": 1367 }, { "epoch": 0.32, "learning_rate": 1.969065373250843e-05, "loss": 0.0175, "step": 1368 }, { "epoch": 0.32, "learning_rate": 1.9690027930664196e-05, "loss": 0.0869, "step": 1369 }, { "epoch": 0.32, "learning_rate": 1.968940150643135e-05, "loss": 0.0458, "step": 1370 }, { "epoch": 0.32, "learning_rate": 1.9688774459850127e-05, "loss": 0.0351, "step": 1371 }, { "epoch": 0.32, "learning_rate": 1.9688146790960804e-05, "loss": 0.1271, "step": 1372 }, { "epoch": 0.32, "learning_rate": 1.9687518499803696e-05, "loss": 0.0628, "step": 1373 }, { "epoch": 0.32, "learning_rate": 1.968688958641916e-05, "loss": 0.0986, "step": 1374 }, { "epoch": 0.32, "learning_rate": 1.9686260050847586e-05, "loss": 0.0246, "step": 1375 }, { "epoch": 0.32, "learning_rate": 1.968562989312941e-05, "loss": 0.0596, "step": 1376 }, { "epoch": 0.32, "learning_rate": 1.9684999113305113e-05, "loss": 0.0399, "step": 1377 }, { "epoch": 0.32, "learning_rate": 1.96843677114152e-05, "loss": 0.0325, "step": 1378 }, { "epoch": 0.32, "learning_rate": 1.9683735687500236e-05, "loss": 0.0773, "step": 1379 }, { "epoch": 0.32, "learning_rate": 1.968310304160081e-05, "loss": 0.0943, "step": 1380 }, { "epoch": 0.32, "learning_rate": 1.9682469773757552e-05, "loss": 0.0344, "step": 1381 }, { "epoch": 0.32, "learning_rate": 1.968183588401115e-05, "loss": 0.0997, "step": 1382 }, { "epoch": 0.32, "learning_rate": 1.9681201372402304e-05, "loss": 0.1497, "step": 1383 }, { "epoch": 0.32, "learning_rate": 1.968056623897178e-05, "loss": 0.0677, "step": 1384 }, { "epoch": 0.33, "learning_rate": 1.9679930483760367e-05, "loss": 0.1472, "step": 1385 }, { "epoch": 0.33, "learning_rate": 1.9679294106808896e-05, "loss": 0.2015, "step": 1386 }, { "epoch": 0.33, "learning_rate": 1.967865710815825e-05, "loss": 0.1141, "step": 1387 }, { "epoch": 0.33, "learning_rate": 1.9678019487849336e-05, "loss": 0.0783, "step": 1388 }, { "epoch": 0.33, "learning_rate": 1.9677381245923115e-05, "loss": 0.156, "step": 1389 }, { "epoch": 0.33, "learning_rate": 1.9676742382420575e-05, "loss": 0.1378, "step": 1390 }, { "epoch": 0.33, "learning_rate": 1.9676102897382752e-05, "loss": 0.1083, "step": 1391 }, { "epoch": 0.33, "learning_rate": 1.9675462790850723e-05, "loss": 0.0764, "step": 1392 }, { "epoch": 0.33, "learning_rate": 1.9674822062865596e-05, "loss": 0.0811, "step": 1393 }, { "epoch": 0.33, "learning_rate": 1.967418071346853e-05, "loss": 0.0792, "step": 1394 }, { "epoch": 0.33, "learning_rate": 1.967353874270072e-05, "loss": 0.0546, "step": 1395 }, { "epoch": 0.33, "learning_rate": 1.967289615060339e-05, "loss": 0.045, "step": 1396 }, { "epoch": 0.33, "learning_rate": 1.9672252937217826e-05, "loss": 0.1113, "step": 1397 }, { "epoch": 0.33, "learning_rate": 1.967160910258533e-05, "loss": 0.1009, "step": 1398 }, { "epoch": 0.33, "learning_rate": 1.9670964646747267e-05, "loss": 0.1111, "step": 1399 }, { "epoch": 0.33, "learning_rate": 1.967031956974502e-05, "loss": 0.0821, "step": 1400 }, { "epoch": 0.33, "learning_rate": 1.966967387162003e-05, "loss": 0.1107, "step": 1401 }, { "epoch": 0.33, "learning_rate": 1.966902755241376e-05, "loss": 0.0555, "step": 1402 }, { "epoch": 0.33, "learning_rate": 1.9668380612167738e-05, "loss": 0.07, "step": 1403 }, { "epoch": 0.33, "learning_rate": 1.96677330509235e-05, "loss": 0.1268, "step": 1404 }, { "epoch": 0.33, "learning_rate": 1.9667084868722652e-05, "loss": 0.0743, "step": 1405 }, { "epoch": 0.33, "learning_rate": 1.9666436065606822e-05, "loss": 0.0659, "step": 1406 }, { "epoch": 0.33, "learning_rate": 1.9665786641617682e-05, "loss": 0.059, "step": 1407 }, { "epoch": 0.33, "learning_rate": 1.9665136596796942e-05, "loss": 0.0579, "step": 1408 }, { "epoch": 0.33, "learning_rate": 1.9664485931186355e-05, "loss": 0.1695, "step": 1409 }, { "epoch": 0.33, "learning_rate": 1.966383464482772e-05, "loss": 0.069, "step": 1410 }, { "epoch": 0.33, "learning_rate": 1.966318273776286e-05, "loss": 0.1221, "step": 1411 }, { "epoch": 0.33, "learning_rate": 1.9662530210033653e-05, "loss": 0.0965, "step": 1412 }, { "epoch": 0.33, "learning_rate": 1.9661877061682013e-05, "loss": 0.1389, "step": 1413 }, { "epoch": 0.33, "learning_rate": 1.9661223292749878e-05, "loss": 0.071, "step": 1414 }, { "epoch": 0.33, "learning_rate": 1.9660568903279254e-05, "loss": 0.0658, "step": 1415 }, { "epoch": 0.33, "learning_rate": 1.9659913893312165e-05, "loss": 0.0521, "step": 1416 }, { "epoch": 0.33, "learning_rate": 1.9659258262890683e-05, "loss": 0.0481, "step": 1417 }, { "epoch": 0.33, "learning_rate": 1.9658602012056925e-05, "loss": 0.0612, "step": 1418 }, { "epoch": 0.33, "learning_rate": 1.965794514085303e-05, "loss": 0.0937, "step": 1419 }, { "epoch": 0.33, "learning_rate": 1.96572876493212e-05, "loss": 0.0787, "step": 1420 }, { "epoch": 0.33, "learning_rate": 1.965662953750366e-05, "loss": 0.1341, "step": 1421 }, { "epoch": 0.33, "learning_rate": 1.965597080544268e-05, "loss": 0.0769, "step": 1422 }, { "epoch": 0.33, "learning_rate": 1.9655311453180575e-05, "loss": 0.0554, "step": 1423 }, { "epoch": 0.33, "learning_rate": 1.965465148075969e-05, "loss": 0.028, "step": 1424 }, { "epoch": 0.33, "learning_rate": 1.965399088822242e-05, "loss": 0.0714, "step": 1425 }, { "epoch": 0.33, "learning_rate": 1.9653329675611185e-05, "loss": 0.09, "step": 1426 }, { "epoch": 0.33, "learning_rate": 1.9652667842968465e-05, "loss": 0.0802, "step": 1427 }, { "epoch": 0.34, "learning_rate": 1.9652005390336765e-05, "loss": 0.0902, "step": 1428 }, { "epoch": 0.34, "learning_rate": 1.9651342317758634e-05, "loss": 0.0407, "step": 1429 }, { "epoch": 0.34, "learning_rate": 1.9650678625276663e-05, "loss": 0.0998, "step": 1430 }, { "epoch": 0.34, "learning_rate": 1.9650014312933478e-05, "loss": 0.0994, "step": 1431 }, { "epoch": 0.34, "learning_rate": 1.964934938077175e-05, "loss": 0.0968, "step": 1432 }, { "epoch": 0.34, "learning_rate": 1.9648683828834187e-05, "loss": 0.0496, "step": 1433 }, { "epoch": 0.34, "learning_rate": 1.9648017657163535e-05, "loss": 0.057, "step": 1434 }, { "epoch": 0.34, "learning_rate": 1.964735086580259e-05, "loss": 0.0635, "step": 1435 }, { "epoch": 0.34, "learning_rate": 1.9646683454794164e-05, "loss": 0.0429, "step": 1436 }, { "epoch": 0.34, "learning_rate": 1.9646015424181143e-05, "loss": 0.0339, "step": 1437 }, { "epoch": 0.34, "learning_rate": 1.964534677400642e-05, "loss": 0.0491, "step": 1438 }, { "epoch": 0.34, "learning_rate": 1.964467750431295e-05, "loss": 0.1141, "step": 1439 }, { "epoch": 0.34, "learning_rate": 1.964400761514372e-05, "loss": 0.1221, "step": 1440 }, { "epoch": 0.34, "learning_rate": 1.9643337106541756e-05, "loss": 0.0713, "step": 1441 }, { "epoch": 0.34, "learning_rate": 1.964266597855012e-05, "loss": 0.1152, "step": 1442 }, { "epoch": 0.34, "learning_rate": 1.9641994231211923e-05, "loss": 0.0805, "step": 1443 }, { "epoch": 0.34, "learning_rate": 1.9641321864570312e-05, "loss": 0.0853, "step": 1444 }, { "epoch": 0.34, "learning_rate": 1.9640648878668473e-05, "loss": 0.0677, "step": 1445 }, { "epoch": 0.34, "learning_rate": 1.963997527354963e-05, "loss": 0.0908, "step": 1446 }, { "epoch": 0.34, "learning_rate": 1.9639301049257046e-05, "loss": 0.0691, "step": 1447 }, { "epoch": 0.34, "learning_rate": 1.9638626205834027e-05, "loss": 0.0302, "step": 1448 }, { "epoch": 0.34, "learning_rate": 1.9637950743323924e-05, "loss": 0.1307, "step": 1449 }, { "epoch": 0.34, "learning_rate": 1.9637274661770116e-05, "loss": 0.0826, "step": 1450 }, { "epoch": 0.34, "learning_rate": 1.963659796121603e-05, "loss": 0.0667, "step": 1451 }, { "epoch": 0.34, "learning_rate": 1.963592064170513e-05, "loss": 0.0742, "step": 1452 }, { "epoch": 0.34, "learning_rate": 1.9635242703280922e-05, "loss": 0.088, "step": 1453 }, { "epoch": 0.34, "learning_rate": 1.9634564145986947e-05, "loss": 0.0556, "step": 1454 }, { "epoch": 0.34, "learning_rate": 1.9633884969866783e-05, "loss": 0.0859, "step": 1455 }, { "epoch": 0.34, "learning_rate": 1.9633205174964068e-05, "loss": 0.0967, "step": 1456 }, { "epoch": 0.34, "learning_rate": 1.963252476132245e-05, "loss": 0.0898, "step": 1457 }, { "epoch": 0.34, "learning_rate": 1.9631843728985643e-05, "loss": 0.0532, "step": 1458 }, { "epoch": 0.34, "learning_rate": 1.9631162077997384e-05, "loss": 0.0765, "step": 1459 }, { "epoch": 0.34, "learning_rate": 1.9630479808401455e-05, "loss": 0.1225, "step": 1460 }, { "epoch": 0.34, "learning_rate": 1.9629796920241685e-05, "loss": 0.0946, "step": 1461 }, { "epoch": 0.34, "learning_rate": 1.9629113413561926e-05, "loss": 0.1435, "step": 1462 }, { "epoch": 0.34, "learning_rate": 1.9628429288406085e-05, "loss": 0.0621, "step": 1463 }, { "epoch": 0.34, "learning_rate": 1.9627744544818102e-05, "loss": 0.0344, "step": 1464 }, { "epoch": 0.34, "learning_rate": 1.9627059182841957e-05, "loss": 0.1638, "step": 1465 }, { "epoch": 0.34, "learning_rate": 1.9626373202521673e-05, "loss": 0.1017, "step": 1466 }, { "epoch": 0.34, "learning_rate": 1.9625686603901304e-05, "loss": 0.1381, "step": 1467 }, { "epoch": 0.34, "learning_rate": 1.9624999387024963e-05, "loss": 0.0552, "step": 1468 }, { "epoch": 0.34, "learning_rate": 1.9624311551936776e-05, "loss": 0.1046, "step": 1469 }, { "epoch": 0.34, "learning_rate": 1.962362309868093e-05, "loss": 0.1257, "step": 1470 }, { "epoch": 0.35, "learning_rate": 1.962293402730164e-05, "loss": 0.0682, "step": 1471 }, { "epoch": 0.35, "learning_rate": 1.9622244337843174e-05, "loss": 0.0634, "step": 1472 }, { "epoch": 0.35, "learning_rate": 1.9621554030349824e-05, "loss": 0.0738, "step": 1473 }, { "epoch": 0.35, "learning_rate": 1.9620863104865924e-05, "loss": 0.1336, "step": 1474 }, { "epoch": 0.35, "learning_rate": 1.962017156143586e-05, "loss": 0.0481, "step": 1475 }, { "epoch": 0.35, "learning_rate": 1.9619479400104043e-05, "loss": 0.0599, "step": 1476 }, { "epoch": 0.35, "learning_rate": 1.9618786620914935e-05, "loss": 0.057, "step": 1477 }, { "epoch": 0.35, "learning_rate": 1.9618093223913033e-05, "loss": 0.0459, "step": 1478 }, { "epoch": 0.35, "learning_rate": 1.961739920914287e-05, "loss": 0.0531, "step": 1479 }, { "epoch": 0.35, "learning_rate": 1.9616704576649028e-05, "loss": 0.0844, "step": 1480 }, { "epoch": 0.35, "learning_rate": 1.9616009326476118e-05, "loss": 0.05, "step": 1481 }, { "epoch": 0.35, "learning_rate": 1.9615313458668796e-05, "loss": 0.1943, "step": 1482 }, { "epoch": 0.35, "learning_rate": 1.9614616973271764e-05, "loss": 0.2691, "step": 1483 }, { "epoch": 0.35, "learning_rate": 1.961391987032975e-05, "loss": 0.0943, "step": 1484 }, { "epoch": 0.35, "learning_rate": 1.961322214988753e-05, "loss": 0.1177, "step": 1485 }, { "epoch": 0.35, "learning_rate": 1.9612523811989922e-05, "loss": 0.1182, "step": 1486 }, { "epoch": 0.35, "learning_rate": 1.9611824856681774e-05, "loss": 0.0894, "step": 1487 }, { "epoch": 0.35, "learning_rate": 1.9611125284007987e-05, "loss": 0.0505, "step": 1488 }, { "epoch": 0.35, "learning_rate": 1.9610425094013487e-05, "loss": 0.0764, "step": 1489 }, { "epoch": 0.35, "learning_rate": 1.9609724286743253e-05, "loss": 0.0505, "step": 1490 }, { "epoch": 0.35, "learning_rate": 1.96090228622423e-05, "loss": 0.178, "step": 1491 }, { "epoch": 0.35, "learning_rate": 1.9608320820555674e-05, "loss": 0.1035, "step": 1492 }, { "epoch": 0.35, "learning_rate": 1.9607618161728467e-05, "loss": 0.1251, "step": 1493 }, { "epoch": 0.35, "learning_rate": 1.960691488580581e-05, "loss": 0.0641, "step": 1494 }, { "epoch": 0.35, "learning_rate": 1.960621099283288e-05, "loss": 0.0808, "step": 1495 }, { "epoch": 0.35, "learning_rate": 1.9605506482854886e-05, "loss": 0.02, "step": 1496 }, { "epoch": 0.35, "learning_rate": 1.9604801355917076e-05, "loss": 0.1586, "step": 1497 }, { "epoch": 0.35, "learning_rate": 1.960409561206474e-05, "loss": 0.0509, "step": 1498 }, { "epoch": 0.35, "learning_rate": 1.9603389251343213e-05, "loss": 0.1209, "step": 1499 }, { "epoch": 0.35, "learning_rate": 1.960268227379786e-05, "loss": 0.0566, "step": 1500 }, { "epoch": 0.35, "learning_rate": 1.960197467947409e-05, "loss": 0.0722, "step": 1501 }, { "epoch": 0.35, "learning_rate": 1.9601266468417353e-05, "loss": 0.0755, "step": 1502 }, { "epoch": 0.35, "learning_rate": 1.960055764067313e-05, "loss": 0.0852, "step": 1503 }, { "epoch": 0.35, "learning_rate": 1.9599848196286966e-05, "loss": 0.0726, "step": 1504 }, { "epoch": 0.35, "learning_rate": 1.9599138135304414e-05, "loss": 0.0561, "step": 1505 }, { "epoch": 0.35, "learning_rate": 1.9598427457771085e-05, "loss": 0.0343, "step": 1506 }, { "epoch": 0.35, "learning_rate": 1.959771616373262e-05, "loss": 0.105, "step": 1507 }, { "epoch": 0.35, "learning_rate": 1.959700425323472e-05, "loss": 0.0676, "step": 1508 }, { "epoch": 0.35, "learning_rate": 1.95962917263231e-05, "loss": 0.0472, "step": 1509 }, { "epoch": 0.35, "learning_rate": 1.9595578583043524e-05, "loss": 0.0547, "step": 1510 }, { "epoch": 0.35, "learning_rate": 1.9594864823441805e-05, "loss": 0.1652, "step": 1511 }, { "epoch": 0.35, "learning_rate": 1.959415044756378e-05, "loss": 0.0729, "step": 1512 }, { "epoch": 0.36, "learning_rate": 1.9593435455455335e-05, "loss": 0.0851, "step": 1513 }, { "epoch": 0.36, "learning_rate": 1.95927198471624e-05, "loss": 0.0948, "step": 1514 }, { "epoch": 0.36, "learning_rate": 1.9592003622730928e-05, "loss": 0.1029, "step": 1515 }, { "epoch": 0.36, "learning_rate": 1.959128678220693e-05, "loss": 0.05, "step": 1516 }, { "epoch": 0.36, "learning_rate": 1.9590569325636447e-05, "loss": 0.0307, "step": 1517 }, { "epoch": 0.36, "learning_rate": 1.9589851253065556e-05, "loss": 0.0945, "step": 1518 }, { "epoch": 0.36, "learning_rate": 1.958913256454039e-05, "loss": 0.0426, "step": 1519 }, { "epoch": 0.36, "learning_rate": 1.9588413260107097e-05, "loss": 0.0778, "step": 1520 }, { "epoch": 0.36, "learning_rate": 1.9587693339811885e-05, "loss": 0.0346, "step": 1521 }, { "epoch": 0.36, "learning_rate": 1.9586972803700996e-05, "loss": 0.1107, "step": 1522 }, { "epoch": 0.36, "learning_rate": 1.9586251651820705e-05, "loss": 0.0742, "step": 1523 }, { "epoch": 0.36, "learning_rate": 1.958552988421733e-05, "loss": 0.0637, "step": 1524 }, { "epoch": 0.36, "learning_rate": 1.958480750093724e-05, "loss": 0.0847, "step": 1525 }, { "epoch": 0.36, "learning_rate": 1.9584084502026823e-05, "loss": 0.0253, "step": 1526 }, { "epoch": 0.36, "learning_rate": 1.958336088753252e-05, "loss": 0.0655, "step": 1527 }, { "epoch": 0.36, "learning_rate": 1.958263665750081e-05, "loss": 0.0948, "step": 1528 }, { "epoch": 0.36, "learning_rate": 1.9581911811978214e-05, "loss": 0.0762, "step": 1529 }, { "epoch": 0.36, "learning_rate": 1.958118635101128e-05, "loss": 0.1315, "step": 1530 }, { "epoch": 0.36, "learning_rate": 1.9580460274646612e-05, "loss": 0.114, "step": 1531 }, { "epoch": 0.36, "learning_rate": 1.957973358293084e-05, "loss": 0.0259, "step": 1532 }, { "epoch": 0.36, "learning_rate": 1.9579006275910645e-05, "loss": 0.1777, "step": 1533 }, { "epoch": 0.36, "learning_rate": 1.9578278353632734e-05, "loss": 0.1293, "step": 1534 }, { "epoch": 0.36, "learning_rate": 1.957754981614387e-05, "loss": 0.1171, "step": 1535 }, { "epoch": 0.36, "learning_rate": 1.9576820663490838e-05, "loss": 0.145, "step": 1536 }, { "epoch": 0.36, "learning_rate": 1.9576090895720482e-05, "loss": 0.0624, "step": 1537 }, { "epoch": 0.36, "learning_rate": 1.9575360512879665e-05, "loss": 0.077, "step": 1538 }, { "epoch": 0.36, "learning_rate": 1.9574629515015304e-05, "loss": 0.0522, "step": 1539 }, { "epoch": 0.36, "learning_rate": 1.957389790217435e-05, "loss": 0.0813, "step": 1540 }, { "epoch": 0.36, "learning_rate": 1.9573165674403792e-05, "loss": 0.1579, "step": 1541 }, { "epoch": 0.36, "learning_rate": 1.9572432831750667e-05, "loss": 0.1419, "step": 1542 }, { "epoch": 0.36, "learning_rate": 1.9571699374262038e-05, "loss": 0.0255, "step": 1543 }, { "epoch": 0.36, "learning_rate": 1.9570965301985024e-05, "loss": 0.0834, "step": 1544 }, { "epoch": 0.36, "learning_rate": 1.9570230614966763e-05, "loss": 0.153, "step": 1545 }, { "epoch": 0.36, "learning_rate": 1.9569495313254454e-05, "loss": 0.0605, "step": 1546 }, { "epoch": 0.36, "learning_rate": 1.956875939689532e-05, "loss": 0.0823, "step": 1547 }, { "epoch": 0.36, "learning_rate": 1.9568022865936625e-05, "loss": 0.0722, "step": 1548 }, { "epoch": 0.36, "learning_rate": 1.9567285720425686e-05, "loss": 0.0706, "step": 1549 }, { "epoch": 0.36, "learning_rate": 1.9566547960409844e-05, "loss": 0.0871, "step": 1550 }, { "epoch": 0.36, "learning_rate": 1.956580958593648e-05, "loss": 0.198, "step": 1551 }, { "epoch": 0.36, "learning_rate": 1.9565070597053034e-05, "loss": 0.1504, "step": 1552 }, { "epoch": 0.36, "learning_rate": 1.9564330993806956e-05, "loss": 0.129, "step": 1553 }, { "epoch": 0.36, "learning_rate": 1.9563590776245756e-05, "loss": 0.1687, "step": 1554 }, { "epoch": 0.36, "learning_rate": 1.9562849944416985e-05, "loss": 0.0981, "step": 1555 }, { "epoch": 0.37, "learning_rate": 1.956210849836822e-05, "loss": 0.1416, "step": 1556 }, { "epoch": 0.37, "learning_rate": 1.9561366438147083e-05, "loss": 0.0715, "step": 1557 }, { "epoch": 0.37, "learning_rate": 1.9560623763801233e-05, "loss": 0.0754, "step": 1558 }, { "epoch": 0.37, "learning_rate": 1.9559880475378384e-05, "loss": 0.1161, "step": 1559 }, { "epoch": 0.37, "learning_rate": 1.9559136572926264e-05, "loss": 0.2739, "step": 1560 }, { "epoch": 0.37, "learning_rate": 1.9558392056492666e-05, "loss": 0.1545, "step": 1561 }, { "epoch": 0.37, "learning_rate": 1.95576469261254e-05, "loss": 0.0695, "step": 1562 }, { "epoch": 0.37, "learning_rate": 1.955690118187233e-05, "loss": 0.0791, "step": 1563 }, { "epoch": 0.37, "learning_rate": 1.9556154823781356e-05, "loss": 0.0564, "step": 1564 }, { "epoch": 0.37, "learning_rate": 1.9555407851900414e-05, "loss": 0.0919, "step": 1565 }, { "epoch": 0.37, "learning_rate": 1.955466026627748e-05, "loss": 0.0524, "step": 1566 }, { "epoch": 0.37, "learning_rate": 1.9553912066960576e-05, "loss": 0.0849, "step": 1567 }, { "epoch": 0.37, "learning_rate": 1.9553163253997757e-05, "loss": 0.0821, "step": 1568 }, { "epoch": 0.37, "learning_rate": 1.955241382743712e-05, "loss": 0.0285, "step": 1569 }, { "epoch": 0.37, "learning_rate": 1.9551663787326798e-05, "loss": 0.1538, "step": 1570 }, { "epoch": 0.37, "learning_rate": 1.9550913133714967e-05, "loss": 0.1262, "step": 1571 }, { "epoch": 0.37, "learning_rate": 1.9550161866649842e-05, "loss": 0.1538, "step": 1572 }, { "epoch": 0.37, "learning_rate": 1.9549409986179675e-05, "loss": 0.075, "step": 1573 }, { "epoch": 0.37, "learning_rate": 1.954865749235276e-05, "loss": 0.0484, "step": 1574 }, { "epoch": 0.37, "learning_rate": 1.9547904385217432e-05, "loss": 0.0629, "step": 1575 }, { "epoch": 0.37, "learning_rate": 1.9547150664822064e-05, "loss": 0.0635, "step": 1576 }, { "epoch": 0.37, "learning_rate": 1.954639633121506e-05, "loss": 0.0568, "step": 1577 }, { "epoch": 0.37, "learning_rate": 1.9545641384444877e-05, "loss": 0.0748, "step": 1578 }, { "epoch": 0.37, "learning_rate": 1.954488582456e-05, "loss": 0.0574, "step": 1579 }, { "epoch": 0.37, "learning_rate": 1.9544129651608962e-05, "loss": 0.0161, "step": 1580 }, { "epoch": 0.37, "learning_rate": 1.9543372865640333e-05, "loss": 0.0554, "step": 1581 }, { "epoch": 0.37, "learning_rate": 1.9542615466702722e-05, "loss": 0.068, "step": 1582 }, { "epoch": 0.37, "learning_rate": 1.954185745484477e-05, "loss": 0.0932, "step": 1583 }, { "epoch": 0.37, "learning_rate": 1.9541098830115168e-05, "loss": 0.0582, "step": 1584 }, { "epoch": 0.37, "learning_rate": 1.9540339592562644e-05, "loss": 0.1034, "step": 1585 }, { "epoch": 0.37, "learning_rate": 1.9539579742235962e-05, "loss": 0.0503, "step": 1586 }, { "epoch": 0.37, "learning_rate": 1.9538819279183928e-05, "loss": 0.0768, "step": 1587 }, { "epoch": 0.37, "learning_rate": 1.9538058203455383e-05, "loss": 0.1469, "step": 1588 }, { "epoch": 0.37, "learning_rate": 1.9537296515099217e-05, "loss": 0.1044, "step": 1589 }, { "epoch": 0.37, "learning_rate": 1.9536534214164347e-05, "loss": 0.0731, "step": 1590 }, { "epoch": 0.37, "learning_rate": 1.9535771300699736e-05, "loss": 0.0908, "step": 1591 }, { "epoch": 0.37, "learning_rate": 1.9535007774754394e-05, "loss": 0.0898, "step": 1592 }, { "epoch": 0.37, "learning_rate": 1.953424363637735e-05, "loss": 0.139, "step": 1593 }, { "epoch": 0.37, "learning_rate": 1.9533478885617692e-05, "loss": 0.0622, "step": 1594 }, { "epoch": 0.37, "learning_rate": 1.9532713522524534e-05, "loss": 0.0971, "step": 1595 }, { "epoch": 0.37, "learning_rate": 1.9531947547147045e-05, "loss": 0.1046, "step": 1596 }, { "epoch": 0.37, "learning_rate": 1.9531180959534415e-05, "loss": 0.1315, "step": 1597 }, { "epoch": 0.38, "learning_rate": 1.9530413759735886e-05, "loss": 0.1124, "step": 1598 }, { "epoch": 0.38, "learning_rate": 1.952964594780073e-05, "loss": 0.071, "step": 1599 }, { "epoch": 0.38, "learning_rate": 1.952887752377827e-05, "loss": 0.0761, "step": 1600 }, { "epoch": 0.38, "learning_rate": 1.952810848771786e-05, "loss": 0.0923, "step": 1601 }, { "epoch": 0.38, "learning_rate": 1.952733883966889e-05, "loss": 0.1108, "step": 1602 }, { "epoch": 0.38, "learning_rate": 1.9526568579680797e-05, "loss": 0.0316, "step": 1603 }, { "epoch": 0.38, "learning_rate": 1.952579770780306e-05, "loss": 0.0328, "step": 1604 }, { "epoch": 0.38, "learning_rate": 1.9525026224085183e-05, "loss": 0.0394, "step": 1605 }, { "epoch": 0.38, "learning_rate": 1.9524254128576724e-05, "loss": 0.2165, "step": 1606 }, { "epoch": 0.38, "learning_rate": 1.9523481421327275e-05, "loss": 0.0369, "step": 1607 }, { "epoch": 0.38, "learning_rate": 1.9522708102386467e-05, "loss": 0.0422, "step": 1608 }, { "epoch": 0.38, "learning_rate": 1.9521934171803964e-05, "loss": 0.0537, "step": 1609 }, { "epoch": 0.38, "learning_rate": 1.952115962962948e-05, "loss": 0.023, "step": 1610 }, { "epoch": 0.38, "learning_rate": 1.9520384475912766e-05, "loss": 0.0596, "step": 1611 }, { "epoch": 0.38, "learning_rate": 1.9519608710703605e-05, "loss": 0.1165, "step": 1612 }, { "epoch": 0.38, "learning_rate": 1.9518832334051828e-05, "loss": 0.0673, "step": 1613 }, { "epoch": 0.38, "learning_rate": 1.95180553460073e-05, "loss": 0.1135, "step": 1614 }, { "epoch": 0.38, "learning_rate": 1.9517277746619926e-05, "loss": 0.1322, "step": 1615 }, { "epoch": 0.38, "learning_rate": 1.9516499535939653e-05, "loss": 0.0977, "step": 1616 }, { "epoch": 0.38, "learning_rate": 1.9515720714016464e-05, "loss": 0.0454, "step": 1617 }, { "epoch": 0.38, "learning_rate": 1.951494128090038e-05, "loss": 0.0943, "step": 1618 }, { "epoch": 0.38, "learning_rate": 1.951416123664147e-05, "loss": 0.1253, "step": 1619 }, { "epoch": 0.38, "learning_rate": 1.9513380581289835e-05, "loss": 0.0362, "step": 1620 }, { "epoch": 0.38, "learning_rate": 1.9512599314895612e-05, "loss": 0.1369, "step": 1621 }, { "epoch": 0.38, "learning_rate": 1.9511817437508985e-05, "loss": 0.0855, "step": 1622 }, { "epoch": 0.38, "learning_rate": 1.9511034949180167e-05, "loss": 0.1714, "step": 1623 }, { "epoch": 0.38, "learning_rate": 1.9510251849959427e-05, "loss": 0.0848, "step": 1624 }, { "epoch": 0.38, "learning_rate": 1.9509468139897062e-05, "loss": 0.0353, "step": 1625 }, { "epoch": 0.38, "learning_rate": 1.9508683819043403e-05, "loss": 0.1225, "step": 1626 }, { "epoch": 0.38, "learning_rate": 1.9507898887448827e-05, "loss": 0.0839, "step": 1627 }, { "epoch": 0.38, "learning_rate": 1.9507113345163758e-05, "loss": 0.1497, "step": 1628 }, { "epoch": 0.38, "learning_rate": 1.9506327192238644e-05, "loss": 0.1302, "step": 1629 }, { "epoch": 0.38, "learning_rate": 1.9505540428723982e-05, "loss": 0.0666, "step": 1630 }, { "epoch": 0.38, "learning_rate": 1.950475305467031e-05, "loss": 0.0961, "step": 1631 }, { "epoch": 0.38, "learning_rate": 1.950396507012819e-05, "loss": 0.0695, "step": 1632 }, { "epoch": 0.38, "learning_rate": 1.9503176475148245e-05, "loss": 0.1213, "step": 1633 }, { "epoch": 0.38, "learning_rate": 1.950238726978112e-05, "loss": 0.0914, "step": 1634 }, { "epoch": 0.38, "learning_rate": 1.950159745407751e-05, "loss": 0.0588, "step": 1635 }, { "epoch": 0.38, "learning_rate": 1.9500807028088138e-05, "loss": 0.0829, "step": 1636 }, { "epoch": 0.38, "learning_rate": 1.950001599186378e-05, "loss": 0.1135, "step": 1637 }, { "epoch": 0.38, "learning_rate": 1.9499224345455242e-05, "loss": 0.1209, "step": 1638 }, { "epoch": 0.38, "learning_rate": 1.9498432088913367e-05, "loss": 0.0236, "step": 1639 }, { "epoch": 0.38, "learning_rate": 1.949763922228905e-05, "loss": 0.0978, "step": 1640 }, { "epoch": 0.39, "learning_rate": 1.9496845745633206e-05, "loss": 0.109, "step": 1641 }, { "epoch": 0.39, "learning_rate": 1.9496051658996807e-05, "loss": 0.0724, "step": 1642 }, { "epoch": 0.39, "learning_rate": 1.949525696243086e-05, "loss": 0.0533, "step": 1643 }, { "epoch": 0.39, "learning_rate": 1.94944616559864e-05, "loss": 0.0978, "step": 1644 }, { "epoch": 0.39, "learning_rate": 1.949366573971451e-05, "loss": 0.1272, "step": 1645 }, { "epoch": 0.39, "learning_rate": 1.949286921366632e-05, "loss": 0.0562, "step": 1646 }, { "epoch": 0.39, "learning_rate": 1.949207207789298e-05, "loss": 0.0474, "step": 1647 }, { "epoch": 0.39, "learning_rate": 1.94912743324457e-05, "loss": 0.0616, "step": 1648 }, { "epoch": 0.39, "learning_rate": 1.9490475977375713e-05, "loss": 0.0208, "step": 1649 }, { "epoch": 0.39, "learning_rate": 1.9489677012734295e-05, "loss": 0.0614, "step": 1650 }, { "epoch": 0.39, "learning_rate": 1.948887743857277e-05, "loss": 0.0839, "step": 1651 }, { "epoch": 0.39, "learning_rate": 1.948807725494249e-05, "loss": 0.0071, "step": 1652 }, { "epoch": 0.39, "learning_rate": 1.9487276461894852e-05, "loss": 0.1825, "step": 1653 }, { "epoch": 0.39, "learning_rate": 1.9486475059481292e-05, "loss": 0.1068, "step": 1654 }, { "epoch": 0.39, "learning_rate": 1.9485673047753283e-05, "loss": 0.0498, "step": 1655 }, { "epoch": 0.39, "learning_rate": 1.9484870426762336e-05, "loss": 0.0323, "step": 1656 }, { "epoch": 0.39, "learning_rate": 1.9484067196560004e-05, "loss": 0.1368, "step": 1657 }, { "epoch": 0.39, "learning_rate": 1.9483263357197882e-05, "loss": 0.1248, "step": 1658 }, { "epoch": 0.39, "learning_rate": 1.94824589087276e-05, "loss": 0.0505, "step": 1659 }, { "epoch": 0.39, "learning_rate": 1.948165385120082e-05, "loss": 0.2165, "step": 1660 }, { "epoch": 0.39, "learning_rate": 1.9480848184669255e-05, "loss": 0.0515, "step": 1661 }, { "epoch": 0.39, "learning_rate": 1.9480041909184656e-05, "loss": 0.0466, "step": 1662 }, { "epoch": 0.39, "learning_rate": 1.9479235024798804e-05, "loss": 0.0995, "step": 1663 }, { "epoch": 0.39, "learning_rate": 1.9478427531563536e-05, "loss": 0.0305, "step": 1664 }, { "epoch": 0.39, "learning_rate": 1.9477619429530702e-05, "loss": 0.1201, "step": 1665 }, { "epoch": 0.39, "learning_rate": 1.947681071875222e-05, "loss": 0.1026, "step": 1666 }, { "epoch": 0.39, "learning_rate": 1.9476001399280026e-05, "loss": 0.1422, "step": 1667 }, { "epoch": 0.39, "learning_rate": 1.94751914711661e-05, "loss": 0.0526, "step": 1668 }, { "epoch": 0.39, "learning_rate": 1.9474380934462475e-05, "loss": 0.1048, "step": 1669 }, { "epoch": 0.39, "learning_rate": 1.9473569789221195e-05, "loss": 0.0867, "step": 1670 }, { "epoch": 0.39, "learning_rate": 1.9472758035494373e-05, "loss": 0.0605, "step": 1671 }, { "epoch": 0.39, "learning_rate": 1.9471945673334142e-05, "loss": 0.1278, "step": 1672 }, { "epoch": 0.39, "learning_rate": 1.9471132702792684e-05, "loss": 0.1055, "step": 1673 }, { "epoch": 0.39, "learning_rate": 1.947031912392221e-05, "loss": 0.0595, "step": 1674 }, { "epoch": 0.39, "learning_rate": 1.946950493677498e-05, "loss": 0.1034, "step": 1675 }, { "epoch": 0.39, "learning_rate": 1.946869014140329e-05, "loss": 0.0632, "step": 1676 }, { "epoch": 0.39, "learning_rate": 1.946787473785947e-05, "loss": 0.0837, "step": 1677 }, { "epoch": 0.39, "learning_rate": 1.94670587261959e-05, "loss": 0.0774, "step": 1678 }, { "epoch": 0.39, "learning_rate": 1.9466242106464982e-05, "loss": 0.0971, "step": 1679 }, { "epoch": 0.39, "learning_rate": 1.9465424878719177e-05, "loss": 0.0164, "step": 1680 }, { "epoch": 0.39, "learning_rate": 1.9464607043010974e-05, "loss": 0.0497, "step": 1681 }, { "epoch": 0.39, "learning_rate": 1.9463788599392896e-05, "loss": 0.1273, "step": 1682 }, { "epoch": 0.39, "learning_rate": 1.9462969547917514e-05, "loss": 0.0993, "step": 1683 }, { "epoch": 0.4, "learning_rate": 1.946214988863744e-05, "loss": 0.0875, "step": 1684 }, { "epoch": 0.4, "learning_rate": 1.9461329621605315e-05, "loss": 0.0587, "step": 1685 }, { "epoch": 0.4, "learning_rate": 1.946050874687383e-05, "loss": 0.0531, "step": 1686 }, { "epoch": 0.4, "learning_rate": 1.9459687264495705e-05, "loss": 0.0929, "step": 1687 }, { "epoch": 0.4, "learning_rate": 1.9458865174523707e-05, "loss": 0.09, "step": 1688 }, { "epoch": 0.4, "learning_rate": 1.9458042477010637e-05, "loss": 0.0557, "step": 1689 }, { "epoch": 0.4, "learning_rate": 1.9457219172009335e-05, "loss": 0.0573, "step": 1690 }, { "epoch": 0.4, "learning_rate": 1.9456395259572686e-05, "loss": 0.039, "step": 1691 }, { "epoch": 0.4, "learning_rate": 1.9455570739753607e-05, "loss": 0.0794, "step": 1692 }, { "epoch": 0.4, "learning_rate": 1.945474561260505e-05, "loss": 0.0912, "step": 1693 }, { "epoch": 0.4, "learning_rate": 1.945391987818003e-05, "loss": 0.0991, "step": 1694 }, { "epoch": 0.4, "learning_rate": 1.945309353653157e-05, "loss": 0.1424, "step": 1695 }, { "epoch": 0.4, "learning_rate": 1.945226658771275e-05, "loss": 0.0511, "step": 1696 }, { "epoch": 0.4, "learning_rate": 1.9451439031776683e-05, "loss": 0.0439, "step": 1697 }, { "epoch": 0.4, "learning_rate": 1.9450610868776524e-05, "loss": 0.0856, "step": 1698 }, { "epoch": 0.4, "learning_rate": 1.9449782098765463e-05, "loss": 0.0304, "step": 1699 }, { "epoch": 0.4, "learning_rate": 1.9448952721796736e-05, "loss": 0.1267, "step": 1700 }, { "epoch": 0.4, "learning_rate": 1.9448122737923612e-05, "loss": 0.0391, "step": 1701 }, { "epoch": 0.4, "learning_rate": 1.9447292147199402e-05, "loss": 0.0761, "step": 1702 }, { "epoch": 0.4, "learning_rate": 1.9446460949677455e-05, "loss": 0.0516, "step": 1703 }, { "epoch": 0.4, "learning_rate": 1.9445629145411154e-05, "loss": 0.1208, "step": 1704 }, { "epoch": 0.4, "learning_rate": 1.9444796734453927e-05, "loss": 0.1144, "step": 1705 }, { "epoch": 0.4, "learning_rate": 1.9443963716859248e-05, "loss": 0.0777, "step": 1706 }, { "epoch": 0.4, "learning_rate": 1.944313009268061e-05, "loss": 0.0357, "step": 1707 }, { "epoch": 0.4, "learning_rate": 1.9442295861971564e-05, "loss": 0.0247, "step": 1708 }, { "epoch": 0.4, "learning_rate": 1.9441461024785685e-05, "loss": 0.0429, "step": 1709 }, { "epoch": 0.4, "learning_rate": 1.9440625581176603e-05, "loss": 0.0359, "step": 1710 }, { "epoch": 0.4, "learning_rate": 1.9439789531197976e-05, "loss": 0.1284, "step": 1711 }, { "epoch": 0.4, "learning_rate": 1.94389528749035e-05, "loss": 0.0521, "step": 1712 }, { "epoch": 0.4, "learning_rate": 1.943811561234691e-05, "loss": 0.0953, "step": 1713 }, { "epoch": 0.4, "learning_rate": 1.9437277743581996e-05, "loss": 0.1775, "step": 1714 }, { "epoch": 0.4, "learning_rate": 1.9436439268662567e-05, "loss": 0.0437, "step": 1715 }, { "epoch": 0.4, "learning_rate": 1.9435600187642474e-05, "loss": 0.1315, "step": 1716 }, { "epoch": 0.4, "learning_rate": 1.9434760500575614e-05, "loss": 0.1094, "step": 1717 }, { "epoch": 0.4, "learning_rate": 1.9433920207515925e-05, "loss": 0.0952, "step": 1718 }, { "epoch": 0.4, "learning_rate": 1.943307930851737e-05, "loss": 0.06, "step": 1719 }, { "epoch": 0.4, "learning_rate": 1.943223780363397e-05, "loss": 0.1193, "step": 1720 }, { "epoch": 0.4, "learning_rate": 1.9431395692919763e-05, "loss": 0.0694, "step": 1721 }, { "epoch": 0.4, "learning_rate": 1.9430552976428843e-05, "loss": 0.1873, "step": 1722 }, { "epoch": 0.4, "learning_rate": 1.9429709654215342e-05, "loss": 0.1228, "step": 1723 }, { "epoch": 0.4, "learning_rate": 1.9428865726333423e-05, "loss": 0.1606, "step": 1724 }, { "epoch": 0.4, "learning_rate": 1.942802119283729e-05, "loss": 0.0944, "step": 1725 }, { "epoch": 0.41, "learning_rate": 1.942717605378119e-05, "loss": 0.0943, "step": 1726 }, { "epoch": 0.41, "learning_rate": 1.94263303092194e-05, "loss": 0.128, "step": 1727 }, { "epoch": 0.41, "learning_rate": 1.9425483959206248e-05, "loss": 0.0962, "step": 1728 }, { "epoch": 0.41, "learning_rate": 1.942463700379609e-05, "loss": 0.1111, "step": 1729 }, { "epoch": 0.41, "learning_rate": 1.9423789443043333e-05, "loss": 0.0864, "step": 1730 }, { "epoch": 0.41, "learning_rate": 1.9422941277002412e-05, "loss": 0.1126, "step": 1731 }, { "epoch": 0.41, "learning_rate": 1.94220925057278e-05, "loss": 0.0996, "step": 1732 }, { "epoch": 0.41, "learning_rate": 1.942124312927402e-05, "loss": 0.0635, "step": 1733 }, { "epoch": 0.41, "learning_rate": 1.9420393147695626e-05, "loss": 0.1138, "step": 1734 }, { "epoch": 0.41, "learning_rate": 1.9419542561047212e-05, "loss": 0.0356, "step": 1735 }, { "epoch": 0.41, "learning_rate": 1.9418691369383407e-05, "loss": 0.0937, "step": 1736 }, { "epoch": 0.41, "learning_rate": 1.9417839572758884e-05, "loss": 0.0878, "step": 1737 }, { "epoch": 0.41, "learning_rate": 1.9416987171228363e-05, "loss": 0.128, "step": 1738 }, { "epoch": 0.41, "learning_rate": 1.941613416484658e-05, "loss": 0.0724, "step": 1739 }, { "epoch": 0.41, "learning_rate": 1.941528055366833e-05, "loss": 0.0675, "step": 1740 }, { "epoch": 0.41, "learning_rate": 1.941442633774844e-05, "loss": 0.0412, "step": 1741 }, { "epoch": 0.41, "learning_rate": 1.9413571517141777e-05, "loss": 0.0382, "step": 1742 }, { "epoch": 0.41, "learning_rate": 1.9412716091903246e-05, "loss": 0.0557, "step": 1743 }, { "epoch": 0.41, "learning_rate": 1.9411860062087786e-05, "loss": 0.0388, "step": 1744 }, { "epoch": 0.41, "learning_rate": 1.9411003427750387e-05, "loss": 0.0526, "step": 1745 }, { "epoch": 0.41, "learning_rate": 1.9410146188946066e-05, "loss": 0.0785, "step": 1746 }, { "epoch": 0.41, "learning_rate": 1.9409288345729882e-05, "loss": 0.0958, "step": 1747 }, { "epoch": 0.41, "learning_rate": 1.940842989815694e-05, "loss": 0.0488, "step": 1748 }, { "epoch": 0.41, "learning_rate": 1.940757084628237e-05, "loss": 0.0334, "step": 1749 }, { "epoch": 0.41, "learning_rate": 1.9406711190161358e-05, "loss": 0.0658, "step": 1750 }, { "epoch": 0.41, "learning_rate": 1.940585092984911e-05, "loss": 0.0148, "step": 1751 }, { "epoch": 0.41, "learning_rate": 1.9404990065400885e-05, "loss": 0.0356, "step": 1752 }, { "epoch": 0.41, "learning_rate": 1.9404128596871977e-05, "loss": 0.016, "step": 1753 }, { "epoch": 0.41, "learning_rate": 1.940326652431772e-05, "loss": 0.0865, "step": 1754 }, { "epoch": 0.41, "learning_rate": 1.940240384779348e-05, "loss": 0.0514, "step": 1755 }, { "epoch": 0.41, "learning_rate": 1.9401540567354665e-05, "loss": 0.0583, "step": 1756 }, { "epoch": 0.41, "learning_rate": 1.9400676683056726e-05, "loss": 0.025, "step": 1757 }, { "epoch": 0.41, "learning_rate": 1.9399812194955156e-05, "loss": 0.0508, "step": 1758 }, { "epoch": 0.41, "learning_rate": 1.939894710310547e-05, "loss": 0.0867, "step": 1759 }, { "epoch": 0.41, "learning_rate": 1.939808140756324e-05, "loss": 0.0375, "step": 1760 }, { "epoch": 0.41, "learning_rate": 1.9397215108384068e-05, "loss": 0.1047, "step": 1761 }, { "epoch": 0.41, "learning_rate": 1.9396348205623598e-05, "loss": 0.0385, "step": 1762 }, { "epoch": 0.41, "learning_rate": 1.939548069933751e-05, "loss": 0.057, "step": 1763 }, { "epoch": 0.41, "learning_rate": 1.9394612589581517e-05, "loss": 0.1539, "step": 1764 }, { "epoch": 0.41, "learning_rate": 1.9393743876411385e-05, "loss": 0.0421, "step": 1765 }, { "epoch": 0.41, "learning_rate": 1.939287455988291e-05, "loss": 0.0796, "step": 1766 }, { "epoch": 0.41, "learning_rate": 1.939200464005193e-05, "loss": 0.0499, "step": 1767 }, { "epoch": 0.41, "learning_rate": 1.9391134116974313e-05, "loss": 0.0596, "step": 1768 }, { "epoch": 0.42, "learning_rate": 1.939026299070598e-05, "loss": 0.0506, "step": 1769 }, { "epoch": 0.42, "learning_rate": 1.9389391261302878e-05, "loss": 0.0521, "step": 1770 }, { "epoch": 0.42, "learning_rate": 1.9388518928821004e-05, "loss": 0.1344, "step": 1771 }, { "epoch": 0.42, "learning_rate": 1.938764599331638e-05, "loss": 0.0458, "step": 1772 }, { "epoch": 0.42, "learning_rate": 1.9386772454845077e-05, "loss": 0.0907, "step": 1773 }, { "epoch": 0.42, "learning_rate": 1.938589831346321e-05, "loss": 0.1418, "step": 1774 }, { "epoch": 0.42, "learning_rate": 1.9385023569226914e-05, "loss": 0.1193, "step": 1775 }, { "epoch": 0.42, "learning_rate": 1.9384148222192376e-05, "loss": 0.0775, "step": 1776 }, { "epoch": 0.42, "learning_rate": 1.9383272272415823e-05, "loss": 0.0454, "step": 1777 }, { "epoch": 0.42, "learning_rate": 1.9382395719953518e-05, "loss": 0.1384, "step": 1778 }, { "epoch": 0.42, "learning_rate": 1.938151856486176e-05, "loss": 0.0574, "step": 1779 }, { "epoch": 0.42, "learning_rate": 1.938064080719688e-05, "loss": 0.0699, "step": 1780 }, { "epoch": 0.42, "learning_rate": 1.937976244701527e-05, "loss": 0.0661, "step": 1781 }, { "epoch": 0.42, "learning_rate": 1.9378883484373343e-05, "loss": 0.1224, "step": 1782 }, { "epoch": 0.42, "learning_rate": 1.937800391932755e-05, "loss": 0.049, "step": 1783 }, { "epoch": 0.42, "learning_rate": 1.9377123751934387e-05, "loss": 0.0904, "step": 1784 }, { "epoch": 0.42, "learning_rate": 1.937624298225039e-05, "loss": 0.1177, "step": 1785 }, { "epoch": 0.42, "learning_rate": 1.937536161033213e-05, "loss": 0.0791, "step": 1786 }, { "epoch": 0.42, "learning_rate": 1.937447963623621e-05, "loss": 0.0577, "step": 1787 }, { "epoch": 0.42, "learning_rate": 1.937359706001929e-05, "loss": 0.0608, "step": 1788 }, { "epoch": 0.42, "learning_rate": 1.937271388173805e-05, "loss": 0.0443, "step": 1789 }, { "epoch": 0.42, "learning_rate": 1.937183010144922e-05, "loss": 0.033, "step": 1790 }, { "epoch": 0.42, "learning_rate": 1.937094571920956e-05, "loss": 0.0486, "step": 1791 }, { "epoch": 0.42, "learning_rate": 1.9370060735075883e-05, "loss": 0.0664, "step": 1792 }, { "epoch": 0.42, "learning_rate": 1.9369175149105025e-05, "loss": 0.1222, "step": 1793 }, { "epoch": 0.42, "learning_rate": 1.9368288961353866e-05, "loss": 0.1026, "step": 1794 }, { "epoch": 0.42, "learning_rate": 1.936740217187933e-05, "loss": 0.1115, "step": 1795 }, { "epoch": 0.42, "learning_rate": 1.936651478073837e-05, "loss": 0.0704, "step": 1796 }, { "epoch": 0.42, "learning_rate": 1.936562678798799e-05, "loss": 0.0941, "step": 1797 }, { "epoch": 0.42, "learning_rate": 1.936473819368522e-05, "loss": 0.0825, "step": 1798 }, { "epoch": 0.42, "learning_rate": 1.9363848997887134e-05, "loss": 0.0683, "step": 1799 }, { "epoch": 0.42, "learning_rate": 1.9362959200650845e-05, "loss": 0.0282, "step": 1800 }, { "epoch": 0.42, "learning_rate": 1.936206880203351e-05, "loss": 0.073, "step": 1801 }, { "epoch": 0.42, "learning_rate": 1.9361177802092314e-05, "loss": 0.067, "step": 1802 }, { "epoch": 0.42, "learning_rate": 1.9360286200884492e-05, "loss": 0.0511, "step": 1803 }, { "epoch": 0.42, "learning_rate": 1.93593939984673e-05, "loss": 0.1261, "step": 1804 }, { "epoch": 0.42, "learning_rate": 1.9358501194898053e-05, "loss": 0.0519, "step": 1805 }, { "epoch": 0.42, "learning_rate": 1.935760779023409e-05, "loss": 0.0846, "step": 1806 }, { "epoch": 0.42, "learning_rate": 1.93567137845328e-05, "loss": 0.0268, "step": 1807 }, { "epoch": 0.42, "learning_rate": 1.9355819177851603e-05, "loss": 0.1296, "step": 1808 }, { "epoch": 0.42, "learning_rate": 1.9354923970247957e-05, "loss": 0.0657, "step": 1809 }, { "epoch": 0.42, "learning_rate": 1.9354028161779364e-05, "loss": 0.0457, "step": 1810 }, { "epoch": 0.43, "learning_rate": 1.9353131752503356e-05, "loss": 0.103, "step": 1811 }, { "epoch": 0.43, "learning_rate": 1.9352234742477515e-05, "loss": 0.0761, "step": 1812 }, { "epoch": 0.43, "learning_rate": 1.9351337131759458e-05, "loss": 0.1102, "step": 1813 }, { "epoch": 0.43, "learning_rate": 1.935043892040683e-05, "loss": 0.0465, "step": 1814 }, { "epoch": 0.43, "learning_rate": 1.934954010847733e-05, "loss": 0.0639, "step": 1815 }, { "epoch": 0.43, "learning_rate": 1.9348640696028686e-05, "loss": 0.0504, "step": 1816 }, { "epoch": 0.43, "learning_rate": 1.9347740683118666e-05, "loss": 0.0455, "step": 1817 }, { "epoch": 0.43, "learning_rate": 1.9346840069805078e-05, "loss": 0.0506, "step": 1818 }, { "epoch": 0.43, "learning_rate": 1.934593885614577e-05, "loss": 0.036, "step": 1819 }, { "epoch": 0.43, "learning_rate": 1.9345037042198626e-05, "loss": 0.0672, "step": 1820 }, { "epoch": 0.43, "learning_rate": 1.9344134628021564e-05, "loss": 0.0957, "step": 1821 }, { "epoch": 0.43, "learning_rate": 1.9343231613672558e-05, "loss": 0.0892, "step": 1822 }, { "epoch": 0.43, "learning_rate": 1.9342327999209596e-05, "loss": 0.0398, "step": 1823 }, { "epoch": 0.43, "learning_rate": 1.9341423784690723e-05, "loss": 0.1638, "step": 1824 }, { "epoch": 0.43, "learning_rate": 1.934051897017402e-05, "loss": 0.1595, "step": 1825 }, { "epoch": 0.43, "learning_rate": 1.9339613555717594e-05, "loss": 0.0811, "step": 1826 }, { "epoch": 0.43, "learning_rate": 1.9338707541379608e-05, "loss": 0.0968, "step": 1827 }, { "epoch": 0.43, "learning_rate": 1.933780092721825e-05, "loss": 0.0912, "step": 1828 }, { "epoch": 0.43, "learning_rate": 1.9336893713291753e-05, "loss": 0.1476, "step": 1829 }, { "epoch": 0.43, "learning_rate": 1.9335985899658384e-05, "loss": 0.1931, "step": 1830 }, { "epoch": 0.43, "learning_rate": 1.933507748637646e-05, "loss": 0.0678, "step": 1831 }, { "epoch": 0.43, "learning_rate": 1.933416847350432e-05, "loss": 0.0856, "step": 1832 }, { "epoch": 0.43, "learning_rate": 1.9333258861100357e-05, "loss": 0.0751, "step": 1833 }, { "epoch": 0.43, "learning_rate": 1.933234864922299e-05, "loss": 0.0411, "step": 1834 }, { "epoch": 0.43, "learning_rate": 1.933143783793068e-05, "loss": 0.0521, "step": 1835 }, { "epoch": 0.43, "learning_rate": 1.9330526427281933e-05, "loss": 0.0685, "step": 1836 }, { "epoch": 0.43, "learning_rate": 1.932961441733529e-05, "loss": 0.0579, "step": 1837 }, { "epoch": 0.43, "learning_rate": 1.932870180814932e-05, "loss": 0.0415, "step": 1838 }, { "epoch": 0.43, "learning_rate": 1.932778859978265e-05, "loss": 0.0684, "step": 1839 }, { "epoch": 0.43, "learning_rate": 1.9326874792293933e-05, "loss": 0.1896, "step": 1840 }, { "epoch": 0.43, "learning_rate": 1.9325960385741855e-05, "loss": 0.0557, "step": 1841 }, { "epoch": 0.43, "learning_rate": 1.932504538018516e-05, "loss": 0.0884, "step": 1842 }, { "epoch": 0.43, "learning_rate": 1.9324129775682608e-05, "loss": 0.1196, "step": 1843 }, { "epoch": 0.43, "learning_rate": 1.9323213572293014e-05, "loss": 0.11, "step": 1844 }, { "epoch": 0.43, "learning_rate": 1.9322296770075225e-05, "loss": 0.1029, "step": 1845 }, { "epoch": 0.43, "learning_rate": 1.9321379369088126e-05, "loss": 0.1051, "step": 1846 }, { "epoch": 0.43, "learning_rate": 1.932046136939064e-05, "loss": 0.1381, "step": 1847 }, { "epoch": 0.43, "learning_rate": 1.9319542771041734e-05, "loss": 0.1419, "step": 1848 }, { "epoch": 0.43, "learning_rate": 1.9318623574100403e-05, "loss": 0.1649, "step": 1849 }, { "epoch": 0.43, "learning_rate": 1.9317703778625697e-05, "loss": 0.1923, "step": 1850 }, { "epoch": 0.43, "learning_rate": 1.9316783384676684e-05, "loss": 0.0846, "step": 1851 }, { "epoch": 0.43, "learning_rate": 1.9315862392312487e-05, "loss": 0.0503, "step": 1852 }, { "epoch": 0.43, "learning_rate": 1.9314940801592255e-05, "loss": 0.0492, "step": 1853 }, { "epoch": 0.44, "learning_rate": 1.931401861257519e-05, "loss": 0.0793, "step": 1854 }, { "epoch": 0.44, "learning_rate": 1.9313095825320517e-05, "loss": 0.0537, "step": 1855 }, { "epoch": 0.44, "learning_rate": 1.9312172439887508e-05, "loss": 0.1678, "step": 1856 }, { "epoch": 0.44, "learning_rate": 1.9311248456335477e-05, "loss": 0.0143, "step": 1857 }, { "epoch": 0.44, "learning_rate": 1.9310323874723766e-05, "loss": 0.0312, "step": 1858 }, { "epoch": 0.44, "learning_rate": 1.930939869511176e-05, "loss": 0.0729, "step": 1859 }, { "epoch": 0.44, "learning_rate": 1.9308472917558886e-05, "loss": 0.0425, "step": 1860 }, { "epoch": 0.44, "learning_rate": 1.9307546542124607e-05, "loss": 0.0634, "step": 1861 }, { "epoch": 0.44, "learning_rate": 1.930661956886842e-05, "loss": 0.0549, "step": 1862 }, { "epoch": 0.44, "learning_rate": 1.930569199784987e-05, "loss": 0.0329, "step": 1863 }, { "epoch": 0.44, "learning_rate": 1.9304763829128528e-05, "loss": 0.1102, "step": 1864 }, { "epoch": 0.44, "learning_rate": 1.930383506276402e-05, "loss": 0.1066, "step": 1865 }, { "epoch": 0.44, "learning_rate": 1.9302905698815986e-05, "loss": 0.084, "step": 1866 }, { "epoch": 0.44, "learning_rate": 1.9301975737344133e-05, "loss": 0.061, "step": 1867 }, { "epoch": 0.44, "learning_rate": 1.930104517840818e-05, "loss": 0.0228, "step": 1868 }, { "epoch": 0.44, "learning_rate": 1.9300114022067905e-05, "loss": 0.0577, "step": 1869 }, { "epoch": 0.44, "learning_rate": 1.9299182268383115e-05, "loss": 0.1397, "step": 1870 }, { "epoch": 0.44, "learning_rate": 1.929824991741366e-05, "loss": 0.0621, "step": 1871 }, { "epoch": 0.44, "learning_rate": 1.9297316969219414e-05, "loss": 0.0364, "step": 1872 }, { "epoch": 0.44, "learning_rate": 1.9296383423860305e-05, "loss": 0.0379, "step": 1873 }, { "epoch": 0.44, "learning_rate": 1.9295449281396297e-05, "loss": 0.1096, "step": 1874 }, { "epoch": 0.44, "learning_rate": 1.929451454188739e-05, "loss": 0.0607, "step": 1875 }, { "epoch": 0.44, "learning_rate": 1.929357920539362e-05, "loss": 0.096, "step": 1876 }, { "epoch": 0.44, "learning_rate": 1.9292643271975066e-05, "loss": 0.1064, "step": 1877 }, { "epoch": 0.44, "learning_rate": 1.9291706741691834e-05, "loss": 0.0804, "step": 1878 }, { "epoch": 0.44, "learning_rate": 1.9290769614604088e-05, "loss": 0.153, "step": 1879 }, { "epoch": 0.44, "learning_rate": 1.9289831890772015e-05, "loss": 0.1193, "step": 1880 }, { "epoch": 0.44, "learning_rate": 1.9288893570255843e-05, "loss": 0.1553, "step": 1881 }, { "epoch": 0.44, "learning_rate": 1.9287954653115847e-05, "loss": 0.1559, "step": 1882 }, { "epoch": 0.44, "learning_rate": 1.9287015139412324e-05, "loss": 0.148, "step": 1883 }, { "epoch": 0.44, "learning_rate": 1.9286075029205628e-05, "loss": 0.15, "step": 1884 }, { "epoch": 0.44, "learning_rate": 1.9285134322556132e-05, "loss": 0.1473, "step": 1885 }, { "epoch": 0.44, "learning_rate": 1.9284193019524267e-05, "loss": 0.1086, "step": 1886 }, { "epoch": 0.44, "learning_rate": 1.9283251120170487e-05, "loss": 0.0846, "step": 1887 }, { "epoch": 0.44, "learning_rate": 1.928230862455529e-05, "loss": 0.0919, "step": 1888 }, { "epoch": 0.44, "learning_rate": 1.9281365532739216e-05, "loss": 0.0586, "step": 1889 }, { "epoch": 0.44, "learning_rate": 1.9280421844782838e-05, "loss": 0.0443, "step": 1890 }, { "epoch": 0.44, "learning_rate": 1.927947756074677e-05, "loss": 0.0315, "step": 1891 }, { "epoch": 0.44, "learning_rate": 1.927853268069166e-05, "loss": 0.0479, "step": 1892 }, { "epoch": 0.44, "learning_rate": 1.92775872046782e-05, "loss": 0.0661, "step": 1893 }, { "epoch": 0.44, "learning_rate": 1.9276641132767115e-05, "loss": 0.0404, "step": 1894 }, { "epoch": 0.44, "learning_rate": 1.9275694465019173e-05, "loss": 0.0446, "step": 1895 }, { "epoch": 0.44, "learning_rate": 1.927474720149518e-05, "loss": 0.1073, "step": 1896 }, { "epoch": 0.45, "learning_rate": 1.9273799342255977e-05, "loss": 0.1978, "step": 1897 }, { "epoch": 0.45, "learning_rate": 1.9272850887362445e-05, "loss": 0.0598, "step": 1898 }, { "epoch": 0.45, "learning_rate": 1.92719018368755e-05, "loss": 0.0624, "step": 1899 }, { "epoch": 0.45, "learning_rate": 1.9270952190856104e-05, "loss": 0.0904, "step": 1900 }, { "epoch": 0.45, "learning_rate": 1.927000194936525e-05, "loss": 0.0842, "step": 1901 }, { "epoch": 0.45, "learning_rate": 1.9269051112463974e-05, "loss": 0.0521, "step": 1902 }, { "epoch": 0.45, "learning_rate": 1.9268099680213342e-05, "loss": 0.0638, "step": 1903 }, { "epoch": 0.45, "learning_rate": 1.926714765267447e-05, "loss": 0.0625, "step": 1904 }, { "epoch": 0.45, "learning_rate": 1.926619502990851e-05, "loss": 0.0723, "step": 1905 }, { "epoch": 0.45, "learning_rate": 1.9265241811976637e-05, "loss": 0.036, "step": 1906 }, { "epoch": 0.45, "learning_rate": 1.926428799894009e-05, "loss": 0.0596, "step": 1907 }, { "epoch": 0.45, "learning_rate": 1.9263333590860124e-05, "loss": 0.0755, "step": 1908 }, { "epoch": 0.45, "learning_rate": 1.926237858779804e-05, "loss": 0.0254, "step": 1909 }, { "epoch": 0.45, "learning_rate": 1.926142298981518e-05, "loss": 0.0783, "step": 1910 }, { "epoch": 0.45, "learning_rate": 1.9260466796972917e-05, "loss": 0.0723, "step": 1911 }, { "epoch": 0.45, "learning_rate": 1.9259510009332675e-05, "loss": 0.146, "step": 1912 }, { "epoch": 0.45, "learning_rate": 1.9258552626955906e-05, "loss": 0.107, "step": 1913 }, { "epoch": 0.45, "learning_rate": 1.9257594649904098e-05, "loss": 0.0132, "step": 1914 }, { "epoch": 0.45, "learning_rate": 1.9256636078238786e-05, "loss": 0.1424, "step": 1915 }, { "epoch": 0.45, "learning_rate": 1.925567691202154e-05, "loss": 0.0931, "step": 1916 }, { "epoch": 0.45, "learning_rate": 1.9254717151313963e-05, "loss": 0.0234, "step": 1917 }, { "epoch": 0.45, "learning_rate": 1.92537567961777e-05, "loss": 0.0419, "step": 1918 }, { "epoch": 0.45, "learning_rate": 1.925279584667444e-05, "loss": 0.1084, "step": 1919 }, { "epoch": 0.45, "learning_rate": 1.92518343028659e-05, "loss": 0.0209, "step": 1920 }, { "epoch": 0.45, "learning_rate": 1.925087216481384e-05, "loss": 0.1119, "step": 1921 }, { "epoch": 0.45, "learning_rate": 1.924990943258006e-05, "loss": 0.0269, "step": 1922 }, { "epoch": 0.45, "learning_rate": 1.9248946106226392e-05, "loss": 0.0868, "step": 1923 }, { "epoch": 0.45, "learning_rate": 1.924798218581472e-05, "loss": 0.2159, "step": 1924 }, { "epoch": 0.45, "learning_rate": 1.924701767140694e-05, "loss": 0.1962, "step": 1925 }, { "epoch": 0.45, "learning_rate": 1.9246052563065017e-05, "loss": 0.0432, "step": 1926 }, { "epoch": 0.45, "learning_rate": 1.9245086860850936e-05, "loss": 0.0817, "step": 1927 }, { "epoch": 0.45, "learning_rate": 1.9244120564826718e-05, "loss": 0.18, "step": 1928 }, { "epoch": 0.45, "learning_rate": 1.9243153675054438e-05, "loss": 0.0823, "step": 1929 }, { "epoch": 0.45, "learning_rate": 1.924218619159619e-05, "loss": 0.0393, "step": 1930 }, { "epoch": 0.45, "learning_rate": 1.9241218114514122e-05, "loss": 0.0705, "step": 1931 }, { "epoch": 0.45, "learning_rate": 1.9240249443870408e-05, "loss": 0.0971, "step": 1932 }, { "epoch": 0.45, "learning_rate": 1.9239280179727272e-05, "loss": 0.0847, "step": 1933 }, { "epoch": 0.45, "learning_rate": 1.923831032214696e-05, "loss": 0.0305, "step": 1934 }, { "epoch": 0.45, "learning_rate": 1.9237339871191777e-05, "loss": 0.0847, "step": 1935 }, { "epoch": 0.45, "learning_rate": 1.9236368826924043e-05, "loss": 0.0776, "step": 1936 }, { "epoch": 0.45, "learning_rate": 1.923539718940614e-05, "loss": 0.0905, "step": 1937 }, { "epoch": 0.45, "learning_rate": 1.9234424958700468e-05, "loss": 0.039, "step": 1938 }, { "epoch": 0.46, "learning_rate": 1.923345213486948e-05, "loss": 0.1828, "step": 1939 }, { "epoch": 0.46, "learning_rate": 1.923247871797565e-05, "loss": 0.0593, "step": 1940 }, { "epoch": 0.46, "learning_rate": 1.9231504708081507e-05, "loss": 0.0845, "step": 1941 }, { "epoch": 0.46, "learning_rate": 1.923053010524961e-05, "loss": 0.061, "step": 1942 }, { "epoch": 0.46, "learning_rate": 1.922955490954256e-05, "loss": 0.0717, "step": 1943 }, { "epoch": 0.46, "learning_rate": 1.9228579121022994e-05, "loss": 0.0312, "step": 1944 }, { "epoch": 0.46, "learning_rate": 1.922760273975358e-05, "loss": 0.0624, "step": 1945 }, { "epoch": 0.46, "learning_rate": 1.9226625765797038e-05, "loss": 0.0386, "step": 1946 }, { "epoch": 0.46, "learning_rate": 1.9225648199216117e-05, "loss": 0.0695, "step": 1947 }, { "epoch": 0.46, "learning_rate": 1.9224670040073607e-05, "loss": 0.0712, "step": 1948 }, { "epoch": 0.46, "learning_rate": 1.9223691288432326e-05, "loss": 0.0765, "step": 1949 }, { "epoch": 0.46, "learning_rate": 1.9222711944355154e-05, "loss": 0.0459, "step": 1950 }, { "epoch": 0.46, "learning_rate": 1.922173200790498e-05, "loss": 0.0609, "step": 1951 }, { "epoch": 0.46, "learning_rate": 1.9220751479144756e-05, "loss": 0.0473, "step": 1952 }, { "epoch": 0.46, "learning_rate": 1.921977035813745e-05, "loss": 0.0613, "step": 1953 }, { "epoch": 0.46, "learning_rate": 1.9218788644946092e-05, "loss": 0.0627, "step": 1954 }, { "epoch": 0.46, "learning_rate": 1.921780633963373e-05, "loss": 0.0575, "step": 1955 }, { "epoch": 0.46, "learning_rate": 1.9216823442263456e-05, "loss": 0.0453, "step": 1956 }, { "epoch": 0.46, "learning_rate": 1.9215839952898405e-05, "loss": 0.0282, "step": 1957 }, { "epoch": 0.46, "learning_rate": 1.9214855871601746e-05, "loss": 0.0596, "step": 1958 }, { "epoch": 0.46, "learning_rate": 1.9213871198436685e-05, "loss": 0.0697, "step": 1959 }, { "epoch": 0.46, "learning_rate": 1.921288593346647e-05, "loss": 0.0331, "step": 1960 }, { "epoch": 0.46, "learning_rate": 1.9211900076754377e-05, "loss": 0.042, "step": 1961 }, { "epoch": 0.46, "learning_rate": 1.9210913628363733e-05, "loss": 0.0439, "step": 1962 }, { "epoch": 0.46, "learning_rate": 1.9209926588357896e-05, "loss": 0.0187, "step": 1963 }, { "epoch": 0.46, "learning_rate": 1.9208938956800268e-05, "loss": 0.1258, "step": 1964 }, { "epoch": 0.46, "learning_rate": 1.9207950733754277e-05, "loss": 0.1082, "step": 1965 }, { "epoch": 0.46, "learning_rate": 1.92069619192834e-05, "loss": 0.0537, "step": 1966 }, { "epoch": 0.46, "learning_rate": 1.920597251345115e-05, "loss": 0.1236, "step": 1967 }, { "epoch": 0.46, "learning_rate": 1.9204982516321077e-05, "loss": 0.0575, "step": 1968 }, { "epoch": 0.46, "learning_rate": 1.920399192795676e-05, "loss": 0.1228, "step": 1969 }, { "epoch": 0.46, "learning_rate": 1.9203000748421835e-05, "loss": 0.061, "step": 1970 }, { "epoch": 0.46, "learning_rate": 1.920200897777996e-05, "loss": 0.1438, "step": 1971 }, { "epoch": 0.46, "learning_rate": 1.9201016616094837e-05, "loss": 0.0234, "step": 1972 }, { "epoch": 0.46, "learning_rate": 1.9200023663430203e-05, "loss": 0.0825, "step": 1973 }, { "epoch": 0.46, "learning_rate": 1.9199030119849835e-05, "loss": 0.145, "step": 1974 }, { "epoch": 0.46, "learning_rate": 1.9198035985417552e-05, "loss": 0.0247, "step": 1975 }, { "epoch": 0.46, "learning_rate": 1.919704126019721e-05, "loss": 0.1368, "step": 1976 }, { "epoch": 0.46, "learning_rate": 1.919604594425269e-05, "loss": 0.1233, "step": 1977 }, { "epoch": 0.46, "learning_rate": 1.9195050037647926e-05, "loss": 0.0216, "step": 1978 }, { "epoch": 0.46, "learning_rate": 1.9194053540446886e-05, "loss": 0.0918, "step": 1979 }, { "epoch": 0.46, "learning_rate": 1.9193056452713578e-05, "loss": 0.102, "step": 1980 }, { "epoch": 0.46, "learning_rate": 1.9192058774512037e-05, "loss": 0.0962, "step": 1981 }, { "epoch": 0.47, "learning_rate": 1.9191060505906346e-05, "loss": 0.0594, "step": 1982 }, { "epoch": 0.47, "learning_rate": 1.9190061646960625e-05, "loss": 0.0421, "step": 1983 }, { "epoch": 0.47, "learning_rate": 1.9189062197739033e-05, "loss": 0.0566, "step": 1984 }, { "epoch": 0.47, "learning_rate": 1.918806215830576e-05, "loss": 0.0751, "step": 1985 }, { "epoch": 0.47, "learning_rate": 1.9187061528725045e-05, "loss": 0.0899, "step": 1986 }, { "epoch": 0.47, "learning_rate": 1.9186060309061146e-05, "loss": 0.0303, "step": 1987 }, { "epoch": 0.47, "learning_rate": 1.9185058499378386e-05, "loss": 0.0292, "step": 1988 }, { "epoch": 0.47, "learning_rate": 1.91840560997411e-05, "loss": 0.0905, "step": 1989 }, { "epoch": 0.47, "learning_rate": 1.9183053110213675e-05, "loss": 0.0625, "step": 1990 }, { "epoch": 0.47, "learning_rate": 1.9182049530860537e-05, "loss": 0.0453, "step": 1991 }, { "epoch": 0.47, "learning_rate": 1.9181045361746136e-05, "loss": 0.1157, "step": 1992 }, { "epoch": 0.47, "learning_rate": 1.918004060293498e-05, "loss": 0.0685, "step": 1993 }, { "epoch": 0.47, "learning_rate": 1.9179035254491597e-05, "loss": 0.0805, "step": 1994 }, { "epoch": 0.47, "learning_rate": 1.9178029316480567e-05, "loss": 0.0458, "step": 1995 }, { "epoch": 0.47, "learning_rate": 1.9177022788966497e-05, "loss": 0.0344, "step": 1996 }, { "epoch": 0.47, "learning_rate": 1.9176015672014038e-05, "loss": 0.0174, "step": 1997 }, { "epoch": 0.47, "learning_rate": 1.917500796568787e-05, "loss": 0.0725, "step": 1998 }, { "epoch": 0.47, "learning_rate": 1.9173999670052728e-05, "loss": 0.0906, "step": 1999 }, { "epoch": 0.47, "learning_rate": 1.9172990785173372e-05, "loss": 0.0593, "step": 2000 }, { "epoch": 0.47, "learning_rate": 1.9171981311114595e-05, "loss": 0.0304, "step": 2001 }, { "epoch": 0.47, "learning_rate": 1.9170971247941245e-05, "loss": 0.0755, "step": 2002 }, { "epoch": 0.47, "learning_rate": 1.9169960595718192e-05, "loss": 0.0207, "step": 2003 }, { "epoch": 0.47, "learning_rate": 1.9168949354510356e-05, "loss": 0.0485, "step": 2004 }, { "epoch": 0.47, "learning_rate": 1.916793752438268e-05, "loss": 0.0888, "step": 2005 }, { "epoch": 0.47, "learning_rate": 1.916692510540016e-05, "loss": 0.122, "step": 2006 }, { "epoch": 0.47, "learning_rate": 1.9165912097627823e-05, "loss": 0.0265, "step": 2007 }, { "epoch": 0.47, "learning_rate": 1.9164898501130734e-05, "loss": 0.0653, "step": 2008 }, { "epoch": 0.47, "learning_rate": 1.9163884315973993e-05, "loss": 0.0899, "step": 2009 }, { "epoch": 0.47, "learning_rate": 1.9162869542222745e-05, "loss": 0.0405, "step": 2010 }, { "epoch": 0.47, "learning_rate": 1.9161854179942166e-05, "loss": 0.0333, "step": 2011 }, { "epoch": 0.47, "learning_rate": 1.9160838229197475e-05, "loss": 0.0682, "step": 2012 }, { "epoch": 0.47, "learning_rate": 1.9159821690053927e-05, "loss": 0.0933, "step": 2013 }, { "epoch": 0.47, "learning_rate": 1.9158804562576808e-05, "loss": 0.0462, "step": 2014 }, { "epoch": 0.47, "learning_rate": 1.9157786846831455e-05, "loss": 0.0861, "step": 2015 }, { "epoch": 0.47, "learning_rate": 1.9156768542883228e-05, "loss": 0.0766, "step": 2016 }, { "epoch": 0.47, "learning_rate": 1.9155749650797542e-05, "loss": 0.0716, "step": 2017 }, { "epoch": 0.47, "learning_rate": 1.9154730170639832e-05, "loss": 0.068, "step": 2018 }, { "epoch": 0.47, "learning_rate": 1.9153710102475584e-05, "loss": 0.1335, "step": 2019 }, { "epoch": 0.47, "learning_rate": 1.9152689446370317e-05, "loss": 0.0461, "step": 2020 }, { "epoch": 0.47, "learning_rate": 1.9151668202389582e-05, "loss": 0.1046, "step": 2021 }, { "epoch": 0.47, "learning_rate": 1.915064637059898e-05, "loss": 0.0522, "step": 2022 }, { "epoch": 0.47, "learning_rate": 1.9149623951064135e-05, "loss": 0.085, "step": 2023 }, { "epoch": 0.48, "learning_rate": 1.914860094385073e-05, "loss": 0.0862, "step": 2024 }, { "epoch": 0.48, "learning_rate": 1.9147577349024456e-05, "loss": 0.0599, "step": 2025 }, { "epoch": 0.48, "learning_rate": 1.9146553166651068e-05, "loss": 0.0469, "step": 2026 }, { "epoch": 0.48, "learning_rate": 1.9145528396796353e-05, "loss": 0.0787, "step": 2027 }, { "epoch": 0.48, "learning_rate": 1.9144503039526123e-05, "loss": 0.1077, "step": 2028 }, { "epoch": 0.48, "learning_rate": 1.914347709490624e-05, "loss": 0.0923, "step": 2029 }, { "epoch": 0.48, "learning_rate": 1.91424505630026e-05, "loss": 0.0951, "step": 2030 }, { "epoch": 0.48, "learning_rate": 1.914142344388114e-05, "loss": 0.1145, "step": 2031 }, { "epoch": 0.48, "learning_rate": 1.9140395737607827e-05, "loss": 0.0471, "step": 2032 }, { "epoch": 0.48, "learning_rate": 1.913936744424867e-05, "loss": 0.0611, "step": 2033 }, { "epoch": 0.48, "learning_rate": 1.913833856386972e-05, "loss": 0.0559, "step": 2034 }, { "epoch": 0.48, "learning_rate": 1.913730909653706e-05, "loss": 0.0634, "step": 2035 }, { "epoch": 0.48, "learning_rate": 1.913627904231681e-05, "loss": 0.0377, "step": 2036 }, { "epoch": 0.48, "learning_rate": 1.9135248401275137e-05, "loss": 0.072, "step": 2037 }, { "epoch": 0.48, "learning_rate": 1.9134217173478237e-05, "loss": 0.0279, "step": 2038 }, { "epoch": 0.48, "learning_rate": 1.9133185358992337e-05, "loss": 0.1153, "step": 2039 }, { "epoch": 0.48, "learning_rate": 1.9132152957883717e-05, "loss": 0.0809, "step": 2040 }, { "epoch": 0.48, "learning_rate": 1.913111997021869e-05, "loss": 0.0953, "step": 2041 }, { "epoch": 0.48, "learning_rate": 1.91300863960636e-05, "loss": 0.0566, "step": 2042 }, { "epoch": 0.48, "learning_rate": 1.9129052235484837e-05, "loss": 0.0382, "step": 2043 }, { "epoch": 0.48, "learning_rate": 1.912801748854882e-05, "loss": 0.0559, "step": 2044 }, { "epoch": 0.48, "learning_rate": 1.912698215532202e-05, "loss": 0.073, "step": 2045 }, { "epoch": 0.48, "learning_rate": 1.9125946235870922e-05, "loss": 0.1233, "step": 2046 }, { "epoch": 0.48, "learning_rate": 1.9124909730262075e-05, "loss": 0.1329, "step": 2047 }, { "epoch": 0.48, "learning_rate": 1.912387263856205e-05, "loss": 0.1118, "step": 2048 }, { "epoch": 0.48, "learning_rate": 1.9122834960837457e-05, "loss": 0.0815, "step": 2049 }, { "epoch": 0.48, "learning_rate": 1.912179669715495e-05, "loss": 0.1054, "step": 2050 }, { "epoch": 0.48, "learning_rate": 1.9120757847581212e-05, "loss": 0.0609, "step": 2051 }, { "epoch": 0.48, "learning_rate": 1.9119718412182968e-05, "loss": 0.0632, "step": 2052 }, { "epoch": 0.48, "learning_rate": 1.9118678391026984e-05, "loss": 0.1224, "step": 2053 }, { "epoch": 0.48, "learning_rate": 1.911763778418006e-05, "loss": 0.1243, "step": 2054 }, { "epoch": 0.48, "learning_rate": 1.9116596591709033e-05, "loss": 0.064, "step": 2055 }, { "epoch": 0.48, "learning_rate": 1.911555481368078e-05, "loss": 0.1033, "step": 2056 }, { "epoch": 0.48, "learning_rate": 1.9114512450162207e-05, "loss": 0.1468, "step": 2057 }, { "epoch": 0.48, "learning_rate": 1.911346950122028e-05, "loss": 0.1031, "step": 2058 }, { "epoch": 0.48, "learning_rate": 1.9112425966921972e-05, "loss": 0.131, "step": 2059 }, { "epoch": 0.48, "learning_rate": 1.9111381847334318e-05, "loss": 0.1497, "step": 2060 }, { "epoch": 0.48, "learning_rate": 1.9110337142524376e-05, "loss": 0.3023, "step": 2061 }, { "epoch": 0.48, "learning_rate": 1.910929185255925e-05, "loss": 0.3452, "step": 2062 }, { "epoch": 0.48, "learning_rate": 1.9108245977506078e-05, "loss": 0.2658, "step": 2063 }, { "epoch": 0.48, "learning_rate": 1.910719951743204e-05, "loss": 0.1585, "step": 2064 }, { "epoch": 0.48, "learning_rate": 1.9106152472404344e-05, "loss": 0.0894, "step": 2065 }, { "epoch": 0.48, "learning_rate": 1.9105104842490247e-05, "loss": 0.0685, "step": 2066 }, { "epoch": 0.49, "learning_rate": 1.9104056627757034e-05, "loss": 0.0896, "step": 2067 }, { "epoch": 0.49, "learning_rate": 1.9103007828272034e-05, "loss": 0.0651, "step": 2068 }, { "epoch": 0.49, "learning_rate": 1.910195844410261e-05, "loss": 0.0466, "step": 2069 }, { "epoch": 0.49, "learning_rate": 1.9100908475316164e-05, "loss": 0.1067, "step": 2070 }, { "epoch": 0.49, "learning_rate": 1.9099857921980136e-05, "loss": 0.0517, "step": 2071 }, { "epoch": 0.49, "learning_rate": 1.9098806784162002e-05, "loss": 0.0386, "step": 2072 }, { "epoch": 0.49, "learning_rate": 1.9097755061929275e-05, "loss": 0.0712, "step": 2073 }, { "epoch": 0.49, "learning_rate": 1.9096702755349513e-05, "loss": 0.069, "step": 2074 }, { "epoch": 0.49, "learning_rate": 1.9095649864490294e-05, "loss": 0.0858, "step": 2075 }, { "epoch": 0.49, "learning_rate": 1.9094596389419258e-05, "loss": 0.0498, "step": 2076 }, { "epoch": 0.49, "learning_rate": 1.9093542330204062e-05, "loss": 0.0734, "step": 2077 }, { "epoch": 0.49, "learning_rate": 1.9092487686912405e-05, "loss": 0.1185, "step": 2078 }, { "epoch": 0.49, "learning_rate": 1.9091432459612036e-05, "loss": 0.1035, "step": 2079 }, { "epoch": 0.49, "learning_rate": 1.9090376648370725e-05, "loss": 0.0352, "step": 2080 }, { "epoch": 0.49, "learning_rate": 1.908932025325629e-05, "loss": 0.026, "step": 2081 }, { "epoch": 0.49, "learning_rate": 1.908826327433658e-05, "loss": 0.1613, "step": 2082 }, { "epoch": 0.49, "learning_rate": 1.9087205711679487e-05, "loss": 0.1643, "step": 2083 }, { "epoch": 0.49, "learning_rate": 1.9086147565352936e-05, "loss": 0.0539, "step": 2084 }, { "epoch": 0.49, "learning_rate": 1.9085088835424894e-05, "loss": 0.0544, "step": 2085 }, { "epoch": 0.49, "learning_rate": 1.9084029521963364e-05, "loss": 0.0142, "step": 2086 }, { "epoch": 0.49, "learning_rate": 1.9082969625036378e-05, "loss": 0.0371, "step": 2087 }, { "epoch": 0.49, "learning_rate": 1.9081909144712024e-05, "loss": 0.1087, "step": 2088 }, { "epoch": 0.49, "learning_rate": 1.9080848081058404e-05, "loss": 0.0377, "step": 2089 }, { "epoch": 0.49, "learning_rate": 1.9079786434143683e-05, "loss": 0.0527, "step": 2090 }, { "epoch": 0.49, "learning_rate": 1.9078724204036037e-05, "loss": 0.0494, "step": 2091 }, { "epoch": 0.49, "learning_rate": 1.9077661390803704e-05, "loss": 0.0795, "step": 2092 }, { "epoch": 0.49, "learning_rate": 1.9076597994514947e-05, "loss": 0.0869, "step": 2093 }, { "epoch": 0.49, "learning_rate": 1.9075534015238062e-05, "loss": 0.0803, "step": 2094 }, { "epoch": 0.49, "learning_rate": 1.907446945304139e-05, "loss": 0.1243, "step": 2095 }, { "epoch": 0.49, "learning_rate": 1.907340430799331e-05, "loss": 0.082, "step": 2096 }, { "epoch": 0.49, "learning_rate": 1.907233858016223e-05, "loss": 0.0717, "step": 2097 }, { "epoch": 0.49, "learning_rate": 1.9071272269616613e-05, "loss": 0.0629, "step": 2098 }, { "epoch": 0.49, "learning_rate": 1.907020537642494e-05, "loss": 0.0575, "step": 2099 }, { "epoch": 0.49, "learning_rate": 1.9069137900655735e-05, "loss": 0.0573, "step": 2100 }, { "epoch": 0.49, "learning_rate": 1.906806984237757e-05, "loss": 0.0351, "step": 2101 }, { "epoch": 0.49, "learning_rate": 1.9067001201659038e-05, "loss": 0.0589, "step": 2102 }, { "epoch": 0.49, "learning_rate": 1.906593197856878e-05, "loss": 0.0659, "step": 2103 }, { "epoch": 0.49, "learning_rate": 1.9064862173175473e-05, "loss": 0.0416, "step": 2104 }, { "epoch": 0.49, "learning_rate": 1.9063791785547833e-05, "loss": 0.0772, "step": 2105 }, { "epoch": 0.49, "learning_rate": 1.9062720815754607e-05, "loss": 0.0507, "step": 2106 }, { "epoch": 0.49, "learning_rate": 1.9061649263864585e-05, "loss": 0.1013, "step": 2107 }, { "epoch": 0.49, "learning_rate": 1.906057712994659e-05, "loss": 0.0535, "step": 2108 }, { "epoch": 0.49, "learning_rate": 1.905950441406949e-05, "loss": 0.0535, "step": 2109 }, { "epoch": 0.5, "learning_rate": 1.9058431116302176e-05, "loss": 0.074, "step": 2110 }, { "epoch": 0.5, "learning_rate": 1.90573572367136e-05, "loss": 0.1011, "step": 2111 }, { "epoch": 0.5, "learning_rate": 1.905628277537272e-05, "loss": 0.0898, "step": 2112 }, { "epoch": 0.5, "learning_rate": 1.9055207732348565e-05, "loss": 0.1029, "step": 2113 }, { "epoch": 0.5, "learning_rate": 1.9054132107710172e-05, "loss": 0.0551, "step": 2114 }, { "epoch": 0.5, "learning_rate": 1.9053055901526635e-05, "loss": 0.0802, "step": 2115 }, { "epoch": 0.5, "learning_rate": 1.905197911386708e-05, "loss": 0.0971, "step": 2116 }, { "epoch": 0.5, "learning_rate": 1.905090174480066e-05, "loss": 0.0379, "step": 2117 }, { "epoch": 0.5, "learning_rate": 1.9049823794396583e-05, "loss": 0.0525, "step": 2118 }, { "epoch": 0.5, "learning_rate": 1.9048745262724083e-05, "loss": 0.128, "step": 2119 }, { "epoch": 0.5, "learning_rate": 1.904766614985244e-05, "loss": 0.0579, "step": 2120 }, { "epoch": 0.5, "learning_rate": 1.9046586455850947e-05, "loss": 0.121, "step": 2121 }, { "epoch": 0.5, "learning_rate": 1.904550618078897e-05, "loss": 0.0766, "step": 2122 }, { "epoch": 0.5, "learning_rate": 1.9044425324735886e-05, "loss": 0.0725, "step": 2123 }, { "epoch": 0.5, "learning_rate": 1.9043343887761123e-05, "loss": 0.0458, "step": 2124 }, { "epoch": 0.5, "learning_rate": 1.904226186993414e-05, "loss": 0.0578, "step": 2125 }, { "epoch": 0.5, "learning_rate": 1.9041179271324432e-05, "loss": 0.0613, "step": 2126 }, { "epoch": 0.5, "learning_rate": 1.904009609200154e-05, "loss": 0.0904, "step": 2127 }, { "epoch": 0.5, "learning_rate": 1.9039012332035033e-05, "loss": 0.0442, "step": 2128 }, { "epoch": 0.5, "learning_rate": 1.9037927991494518e-05, "loss": 0.0572, "step": 2129 }, { "epoch": 0.5, "learning_rate": 1.9036843070449646e-05, "loss": 0.0502, "step": 2130 }, { "epoch": 0.5, "learning_rate": 1.90357575689701e-05, "loss": 0.0297, "step": 2131 }, { "epoch": 0.5, "learning_rate": 1.90346714871256e-05, "loss": 0.054, "step": 2132 }, { "epoch": 0.5, "learning_rate": 1.903358482498591e-05, "loss": 0.0764, "step": 2133 }, { "epoch": 0.5, "learning_rate": 1.9032497582620818e-05, "loss": 0.0891, "step": 2134 }, { "epoch": 0.5, "learning_rate": 1.903140976010016e-05, "loss": 0.0469, "step": 2135 }, { "epoch": 0.5, "learning_rate": 1.9030321357493813e-05, "loss": 0.0458, "step": 2136 }, { "epoch": 0.5, "learning_rate": 1.9029232374871678e-05, "loss": 0.0918, "step": 2137 }, { "epoch": 0.5, "learning_rate": 1.9028142812303705e-05, "loss": 0.0931, "step": 2138 }, { "epoch": 0.5, "learning_rate": 1.902705266985987e-05, "loss": 0.0737, "step": 2139 }, { "epoch": 0.5, "learning_rate": 1.90259619476102e-05, "loss": 0.0672, "step": 2140 }, { "epoch": 0.5, "learning_rate": 1.9024870645624744e-05, "loss": 0.0622, "step": 2141 }, { "epoch": 0.5, "learning_rate": 1.9023778763973603e-05, "loss": 0.0154, "step": 2142 }, { "epoch": 0.5, "learning_rate": 1.9022686302726907e-05, "loss": 0.0235, "step": 2143 }, { "epoch": 0.5, "learning_rate": 1.902159326195482e-05, "loss": 0.0389, "step": 2144 }, { "epoch": 0.5, "learning_rate": 1.9020499641727556e-05, "loss": 0.0439, "step": 2145 }, { "epoch": 0.5, "learning_rate": 1.901940544211535e-05, "loss": 0.0653, "step": 2146 }, { "epoch": 0.5, "learning_rate": 1.9018310663188483e-05, "loss": 0.0831, "step": 2147 }, { "epoch": 0.5, "learning_rate": 1.901721530501728e-05, "loss": 0.024, "step": 2148 }, { "epoch": 0.5, "learning_rate": 1.9016119367672087e-05, "loss": 0.1089, "step": 2149 }, { "epoch": 0.5, "learning_rate": 1.90150228512233e-05, "loss": 0.0736, "step": 2150 }, { "epoch": 0.5, "learning_rate": 1.901392575574135e-05, "loss": 0.0881, "step": 2151 }, { "epoch": 0.51, "learning_rate": 1.9012828081296693e-05, "loss": 0.0822, "step": 2152 }, { "epoch": 0.51, "learning_rate": 1.9011729827959845e-05, "loss": 0.0131, "step": 2153 }, { "epoch": 0.51, "learning_rate": 1.901063099580134e-05, "loss": 0.1213, "step": 2154 }, { "epoch": 0.51, "learning_rate": 1.900953158489176e-05, "loss": 0.0615, "step": 2155 }, { "epoch": 0.51, "learning_rate": 1.9008431595301715e-05, "loss": 0.0238, "step": 2156 }, { "epoch": 0.51, "learning_rate": 1.900733102710186e-05, "loss": 0.0137, "step": 2157 }, { "epoch": 0.51, "learning_rate": 1.9006229880362884e-05, "loss": 0.0639, "step": 2158 }, { "epoch": 0.51, "learning_rate": 1.9005128155155513e-05, "loss": 0.0558, "step": 2159 }, { "epoch": 0.51, "learning_rate": 1.900402585155051e-05, "loss": 0.1371, "step": 2160 }, { "epoch": 0.51, "learning_rate": 1.900292296961868e-05, "loss": 0.109, "step": 2161 }, { "epoch": 0.51, "learning_rate": 1.9001819509430853e-05, "loss": 0.1148, "step": 2162 }, { "epoch": 0.51, "learning_rate": 1.9000715471057912e-05, "loss": 0.0443, "step": 2163 }, { "epoch": 0.51, "learning_rate": 1.8999610854570762e-05, "loss": 0.0612, "step": 2164 }, { "epoch": 0.51, "learning_rate": 1.8998505660040356e-05, "loss": 0.0261, "step": 2165 }, { "epoch": 0.51, "learning_rate": 1.8997399887537683e-05, "loss": 0.0426, "step": 2166 }, { "epoch": 0.51, "learning_rate": 1.8996293537133764e-05, "loss": 0.0273, "step": 2167 }, { "epoch": 0.51, "learning_rate": 1.8995186608899655e-05, "loss": 0.0653, "step": 2168 }, { "epoch": 0.51, "learning_rate": 1.899407910290646e-05, "loss": 0.0173, "step": 2169 }, { "epoch": 0.51, "learning_rate": 1.8992971019225317e-05, "loss": 0.0505, "step": 2170 }, { "epoch": 0.51, "learning_rate": 1.899186235792739e-05, "loss": 0.0501, "step": 2171 }, { "epoch": 0.51, "learning_rate": 1.899075311908389e-05, "loss": 0.0486, "step": 2172 }, { "epoch": 0.51, "learning_rate": 1.8989643302766065e-05, "loss": 0.0222, "step": 2173 }, { "epoch": 0.51, "learning_rate": 1.8988532909045195e-05, "loss": 0.1582, "step": 2174 }, { "epoch": 0.51, "learning_rate": 1.8987421937992608e-05, "loss": 0.1369, "step": 2175 }, { "epoch": 0.51, "learning_rate": 1.8986310389679658e-05, "loss": 0.0858, "step": 2176 }, { "epoch": 0.51, "learning_rate": 1.898519826417773e-05, "loss": 0.0489, "step": 2177 }, { "epoch": 0.51, "learning_rate": 1.8984085561558272e-05, "loss": 0.0557, "step": 2178 }, { "epoch": 0.51, "learning_rate": 1.8982972281892742e-05, "loss": 0.0606, "step": 2179 }, { "epoch": 0.51, "learning_rate": 1.8981858425252648e-05, "loss": 0.0661, "step": 2180 }, { "epoch": 0.51, "learning_rate": 1.8980743991709533e-05, "loss": 0.0399, "step": 2181 }, { "epoch": 0.51, "learning_rate": 1.897962898133497e-05, "loss": 0.0698, "step": 2182 }, { "epoch": 0.51, "learning_rate": 1.897851339420059e-05, "loss": 0.0615, "step": 2183 }, { "epoch": 0.51, "learning_rate": 1.897739723037804e-05, "loss": 0.0764, "step": 2184 }, { "epoch": 0.51, "learning_rate": 1.897628048993901e-05, "loss": 0.0314, "step": 2185 }, { "epoch": 0.51, "learning_rate": 1.8975163172955227e-05, "loss": 0.1568, "step": 2186 }, { "epoch": 0.51, "learning_rate": 1.8974045279498458e-05, "loss": 0.1307, "step": 2187 }, { "epoch": 0.51, "learning_rate": 1.8972926809640505e-05, "loss": 0.0475, "step": 2188 }, { "epoch": 0.51, "learning_rate": 1.8971807763453207e-05, "loss": 0.0561, "step": 2189 }, { "epoch": 0.51, "learning_rate": 1.897068814100844e-05, "loss": 0.0914, "step": 2190 }, { "epoch": 0.51, "learning_rate": 1.8969567942378115e-05, "loss": 0.0569, "step": 2191 }, { "epoch": 0.51, "learning_rate": 1.8968447167634188e-05, "loss": 0.1338, "step": 2192 }, { "epoch": 0.51, "learning_rate": 1.896732581684864e-05, "loss": 0.0723, "step": 2193 }, { "epoch": 0.51, "learning_rate": 1.8966203890093496e-05, "loss": 0.0435, "step": 2194 }, { "epoch": 0.52, "learning_rate": 1.8965081387440826e-05, "loss": 0.0878, "step": 2195 }, { "epoch": 0.52, "learning_rate": 1.8963958308962713e-05, "loss": 0.0584, "step": 2196 }, { "epoch": 0.52, "learning_rate": 1.8962834654731305e-05, "loss": 0.036, "step": 2197 }, { "epoch": 0.52, "learning_rate": 1.8961710424818766e-05, "loss": 0.0347, "step": 2198 }, { "epoch": 0.52, "learning_rate": 1.896058561929731e-05, "loss": 0.0736, "step": 2199 }, { "epoch": 0.52, "learning_rate": 1.8959460238239178e-05, "loss": 0.0567, "step": 2200 }, { "epoch": 0.52, "learning_rate": 1.895833428171666e-05, "loss": 0.062, "step": 2201 }, { "epoch": 0.52, "learning_rate": 1.8957207749802067e-05, "loss": 0.0228, "step": 2202 }, { "epoch": 0.52, "learning_rate": 1.8956080642567764e-05, "loss": 0.0579, "step": 2203 }, { "epoch": 0.52, "learning_rate": 1.8954952960086143e-05, "loss": 0.0152, "step": 2204 }, { "epoch": 0.52, "learning_rate": 1.8953824702429634e-05, "loss": 0.0948, "step": 2205 }, { "epoch": 0.52, "learning_rate": 1.89526958696707e-05, "loss": 0.0731, "step": 2206 }, { "epoch": 0.52, "learning_rate": 1.8951566461881855e-05, "loss": 0.0449, "step": 2207 }, { "epoch": 0.52, "learning_rate": 1.8950436479135638e-05, "loss": 0.2058, "step": 2208 }, { "epoch": 0.52, "learning_rate": 1.8949305921504618e-05, "loss": 0.0149, "step": 2209 }, { "epoch": 0.52, "learning_rate": 1.894817478906142e-05, "loss": 0.0315, "step": 2210 }, { "epoch": 0.52, "learning_rate": 1.89470430818787e-05, "loss": 0.1059, "step": 2211 }, { "epoch": 0.52, "learning_rate": 1.8945910800029136e-05, "loss": 0.0431, "step": 2212 }, { "epoch": 0.52, "learning_rate": 1.8944777943585463e-05, "loss": 0.0751, "step": 2213 }, { "epoch": 0.52, "learning_rate": 1.894364451262044e-05, "loss": 0.0628, "step": 2214 }, { "epoch": 0.52, "learning_rate": 1.8942510507206867e-05, "loss": 0.1438, "step": 2215 }, { "epoch": 0.52, "learning_rate": 1.8941375927417583e-05, "loss": 0.1617, "step": 2216 }, { "epoch": 0.52, "learning_rate": 1.8940240773325463e-05, "loss": 0.0991, "step": 2217 }, { "epoch": 0.52, "learning_rate": 1.8939105045003413e-05, "loss": 0.0259, "step": 2218 }, { "epoch": 0.52, "learning_rate": 1.8937968742524382e-05, "loss": 0.0755, "step": 2219 }, { "epoch": 0.52, "learning_rate": 1.8936831865961358e-05, "loss": 0.1666, "step": 2220 }, { "epoch": 0.52, "learning_rate": 1.893569441538736e-05, "loss": 0.1574, "step": 2221 }, { "epoch": 0.52, "learning_rate": 1.8934556390875445e-05, "loss": 0.0677, "step": 2222 }, { "epoch": 0.52, "learning_rate": 1.893341779249871e-05, "loss": 0.0985, "step": 2223 }, { "epoch": 0.52, "learning_rate": 1.8932278620330286e-05, "loss": 0.0451, "step": 2224 }, { "epoch": 0.52, "learning_rate": 1.8931138874443343e-05, "loss": 0.1107, "step": 2225 }, { "epoch": 0.52, "learning_rate": 1.8929998554911085e-05, "loss": 0.1654, "step": 2226 }, { "epoch": 0.52, "learning_rate": 1.8928857661806757e-05, "loss": 0.0899, "step": 2227 }, { "epoch": 0.52, "learning_rate": 1.8927716195203637e-05, "loss": 0.0598, "step": 2228 }, { "epoch": 0.52, "learning_rate": 1.8926574155175038e-05, "loss": 0.0601, "step": 2229 }, { "epoch": 0.52, "learning_rate": 1.892543154179432e-05, "loss": 0.0236, "step": 2230 }, { "epoch": 0.52, "learning_rate": 1.892428835513486e-05, "loss": 0.0559, "step": 2231 }, { "epoch": 0.52, "learning_rate": 1.8923144595270102e-05, "loss": 0.0494, "step": 2232 }, { "epoch": 0.52, "learning_rate": 1.89220002622735e-05, "loss": 0.1051, "step": 2233 }, { "epoch": 0.52, "learning_rate": 1.892085535621855e-05, "loss": 0.0831, "step": 2234 }, { "epoch": 0.52, "learning_rate": 1.89197098771788e-05, "loss": 0.0271, "step": 2235 }, { "epoch": 0.52, "learning_rate": 1.8918563825227816e-05, "loss": 0.0475, "step": 2236 }, { "epoch": 0.53, "learning_rate": 1.891741720043921e-05, "loss": 0.0523, "step": 2237 }, { "epoch": 0.53, "learning_rate": 1.8916270002886632e-05, "loss": 0.087, "step": 2238 }, { "epoch": 0.53, "learning_rate": 1.891512223264376e-05, "loss": 0.0846, "step": 2239 }, { "epoch": 0.53, "learning_rate": 1.8913973889784325e-05, "loss": 0.0293, "step": 2240 }, { "epoch": 0.53, "learning_rate": 1.891282497438208e-05, "loss": 0.0679, "step": 2241 }, { "epoch": 0.53, "learning_rate": 1.8911675486510814e-05, "loss": 0.0201, "step": 2242 }, { "epoch": 0.53, "learning_rate": 1.8910525426244367e-05, "loss": 0.0495, "step": 2243 }, { "epoch": 0.53, "learning_rate": 1.89093747936566e-05, "loss": 0.0721, "step": 2244 }, { "epoch": 0.53, "learning_rate": 1.8908223588821424e-05, "loss": 0.1111, "step": 2245 }, { "epoch": 0.53, "learning_rate": 1.890707181181278e-05, "loss": 0.0523, "step": 2246 }, { "epoch": 0.53, "learning_rate": 1.8905919462704643e-05, "loss": 0.0815, "step": 2247 }, { "epoch": 0.53, "learning_rate": 1.890476654157103e-05, "loss": 0.0819, "step": 2248 }, { "epoch": 0.53, "learning_rate": 1.890361304848599e-05, "loss": 0.0534, "step": 2249 }, { "epoch": 0.53, "learning_rate": 1.890245898352362e-05, "loss": 0.0782, "step": 2250 }, { "epoch": 0.53, "learning_rate": 1.8901304346758035e-05, "loss": 0.0424, "step": 2251 }, { "epoch": 0.53, "learning_rate": 1.8900149138263403e-05, "loss": 0.1947, "step": 2252 }, { "epoch": 0.53, "learning_rate": 1.889899335811392e-05, "loss": 0.0973, "step": 2253 }, { "epoch": 0.53, "learning_rate": 1.8897837006383825e-05, "loss": 0.0366, "step": 2254 }, { "epoch": 0.53, "learning_rate": 1.8896680083147387e-05, "loss": 0.1447, "step": 2255 }, { "epoch": 0.53, "learning_rate": 1.8895522588478917e-05, "loss": 0.0314, "step": 2256 }, { "epoch": 0.53, "learning_rate": 1.889436452245276e-05, "loss": 0.032, "step": 2257 }, { "epoch": 0.53, "learning_rate": 1.88932058851433e-05, "loss": 0.1024, "step": 2258 }, { "epoch": 0.53, "learning_rate": 1.8892046676624952e-05, "loss": 0.064, "step": 2259 }, { "epoch": 0.53, "learning_rate": 1.8890886896972174e-05, "loss": 0.0527, "step": 2260 }, { "epoch": 0.53, "learning_rate": 1.888972654625946e-05, "loss": 0.0128, "step": 2261 }, { "epoch": 0.53, "learning_rate": 1.8888565624561334e-05, "loss": 0.0132, "step": 2262 }, { "epoch": 0.53, "learning_rate": 1.888740413195237e-05, "loss": 0.0738, "step": 2263 }, { "epoch": 0.53, "learning_rate": 1.8886242068507165e-05, "loss": 0.0428, "step": 2264 }, { "epoch": 0.53, "learning_rate": 1.8885079434300357e-05, "loss": 0.0359, "step": 2265 }, { "epoch": 0.53, "learning_rate": 1.8883916229406627e-05, "loss": 0.0534, "step": 2266 }, { "epoch": 0.53, "learning_rate": 1.888275245390068e-05, "loss": 0.0551, "step": 2267 }, { "epoch": 0.53, "learning_rate": 1.8881588107857272e-05, "loss": 0.1125, "step": 2268 }, { "epoch": 0.53, "learning_rate": 1.8880423191351186e-05, "loss": 0.174, "step": 2269 }, { "epoch": 0.53, "learning_rate": 1.8879257704457244e-05, "loss": 0.1551, "step": 2270 }, { "epoch": 0.53, "learning_rate": 1.8878091647250303e-05, "loss": 0.067, "step": 2271 }, { "epoch": 0.53, "learning_rate": 1.8876925019805264e-05, "loss": 0.0963, "step": 2272 }, { "epoch": 0.53, "learning_rate": 1.8875757822197052e-05, "loss": 0.0459, "step": 2273 }, { "epoch": 0.53, "learning_rate": 1.8874590054500644e-05, "loss": 0.0464, "step": 2274 }, { "epoch": 0.53, "learning_rate": 1.8873421716791042e-05, "loss": 0.1267, "step": 2275 }, { "epoch": 0.53, "learning_rate": 1.8872252809143284e-05, "loss": 0.0655, "step": 2276 }, { "epoch": 0.53, "learning_rate": 1.8871083331632457e-05, "loss": 0.0812, "step": 2277 }, { "epoch": 0.53, "learning_rate": 1.886991328433367e-05, "loss": 0.0857, "step": 2278 }, { "epoch": 0.53, "learning_rate": 1.8868742667322073e-05, "loss": 0.044, "step": 2279 }, { "epoch": 0.54, "learning_rate": 1.886757148067286e-05, "loss": 0.0884, "step": 2280 }, { "epoch": 0.54, "learning_rate": 1.8866399724461256e-05, "loss": 0.014, "step": 2281 }, { "epoch": 0.54, "learning_rate": 1.886522739876252e-05, "loss": 0.1566, "step": 2282 }, { "epoch": 0.54, "learning_rate": 1.8864054503651952e-05, "loss": 0.156, "step": 2283 }, { "epoch": 0.54, "learning_rate": 1.8862881039204883e-05, "loss": 0.136, "step": 2284 }, { "epoch": 0.54, "learning_rate": 1.886170700549669e-05, "loss": 0.1871, "step": 2285 }, { "epoch": 0.54, "learning_rate": 1.8860532402602777e-05, "loss": 0.0623, "step": 2286 }, { "epoch": 0.54, "learning_rate": 1.885935723059859e-05, "loss": 0.103, "step": 2287 }, { "epoch": 0.54, "learning_rate": 1.8858181489559612e-05, "loss": 0.0957, "step": 2288 }, { "epoch": 0.54, "learning_rate": 1.8857005179561355e-05, "loss": 0.1298, "step": 2289 }, { "epoch": 0.54, "learning_rate": 1.885582830067938e-05, "loss": 0.066, "step": 2290 }, { "epoch": 0.54, "learning_rate": 1.885465085298927e-05, "loss": 0.1049, "step": 2291 }, { "epoch": 0.54, "learning_rate": 1.885347283656666e-05, "loss": 0.0398, "step": 2292 }, { "epoch": 0.54, "learning_rate": 1.8852294251487212e-05, "loss": 0.0736, "step": 2293 }, { "epoch": 0.54, "learning_rate": 1.885111509782662e-05, "loss": 0.0702, "step": 2294 }, { "epoch": 0.54, "learning_rate": 1.8849935375660626e-05, "loss": 0.1106, "step": 2295 }, { "epoch": 0.54, "learning_rate": 1.8848755085065007e-05, "loss": 0.0977, "step": 2296 }, { "epoch": 0.54, "learning_rate": 1.8847574226115565e-05, "loss": 0.077, "step": 2297 }, { "epoch": 0.54, "learning_rate": 1.884639279888815e-05, "loss": 0.0752, "step": 2298 }, { "epoch": 0.54, "learning_rate": 1.8845210803458646e-05, "loss": 0.0491, "step": 2299 }, { "epoch": 0.54, "learning_rate": 1.884402823990297e-05, "loss": 0.0793, "step": 2300 }, { "epoch": 0.54, "learning_rate": 1.884284510829708e-05, "loss": 0.0742, "step": 2301 }, { "epoch": 0.54, "learning_rate": 1.8841661408716968e-05, "loss": 0.132, "step": 2302 }, { "epoch": 0.54, "learning_rate": 1.8840477141238662e-05, "loss": 0.0673, "step": 2303 }, { "epoch": 0.54, "learning_rate": 1.8839292305938227e-05, "loss": 0.0594, "step": 2304 }, { "epoch": 0.54, "learning_rate": 1.8838106902891763e-05, "loss": 0.0326, "step": 2305 }, { "epoch": 0.54, "learning_rate": 1.8836920932175412e-05, "loss": 0.0511, "step": 2306 }, { "epoch": 0.54, "learning_rate": 1.8835734393865346e-05, "loss": 0.0469, "step": 2307 }, { "epoch": 0.54, "learning_rate": 1.8834547288037778e-05, "loss": 0.0823, "step": 2308 }, { "epoch": 0.54, "learning_rate": 1.8833359614768957e-05, "loss": 0.0857, "step": 2309 }, { "epoch": 0.54, "learning_rate": 1.883217137413516e-05, "loss": 0.0807, "step": 2310 }, { "epoch": 0.54, "learning_rate": 1.8830982566212713e-05, "loss": 0.0475, "step": 2311 }, { "epoch": 0.54, "learning_rate": 1.8829793191077973e-05, "loss": 0.0451, "step": 2312 }, { "epoch": 0.54, "learning_rate": 1.8828603248807335e-05, "loss": 0.085, "step": 2313 }, { "epoch": 0.54, "learning_rate": 1.8827412739477224e-05, "loss": 0.0654, "step": 2314 }, { "epoch": 0.54, "learning_rate": 1.8826221663164107e-05, "loss": 0.0375, "step": 2315 }, { "epoch": 0.54, "learning_rate": 1.882503001994449e-05, "loss": 0.0688, "step": 2316 }, { "epoch": 0.54, "learning_rate": 1.882383780989491e-05, "loss": 0.0607, "step": 2317 }, { "epoch": 0.54, "learning_rate": 1.882264503309194e-05, "loss": 0.1532, "step": 2318 }, { "epoch": 0.54, "learning_rate": 1.8821451689612194e-05, "loss": 0.0383, "step": 2319 }, { "epoch": 0.54, "learning_rate": 1.882025777953232e-05, "loss": 0.0258, "step": 2320 }, { "epoch": 0.54, "learning_rate": 1.881906330292901e-05, "loss": 0.0418, "step": 2321 }, { "epoch": 0.54, "learning_rate": 1.8817868259878972e-05, "loss": 0.0641, "step": 2322 }, { "epoch": 0.55, "learning_rate": 1.881667265045897e-05, "loss": 0.0731, "step": 2323 }, { "epoch": 0.55, "learning_rate": 1.8815476474745796e-05, "loss": 0.1108, "step": 2324 }, { "epoch": 0.55, "learning_rate": 1.8814279732816283e-05, "loss": 0.0445, "step": 2325 }, { "epoch": 0.55, "learning_rate": 1.8813082424747296e-05, "loss": 0.075, "step": 2326 }, { "epoch": 0.55, "learning_rate": 1.8811884550615737e-05, "loss": 0.0328, "step": 2327 }, { "epoch": 0.55, "learning_rate": 1.881068611049855e-05, "loss": 0.0724, "step": 2328 }, { "epoch": 0.55, "learning_rate": 1.8809487104472703e-05, "loss": 0.0357, "step": 2329 }, { "epoch": 0.55, "learning_rate": 1.8808287532615207e-05, "loss": 0.0367, "step": 2330 }, { "epoch": 0.55, "learning_rate": 1.8807087395003118e-05, "loss": 0.0404, "step": 2331 }, { "epoch": 0.55, "learning_rate": 1.8805886691713516e-05, "loss": 0.0369, "step": 2332 }, { "epoch": 0.55, "learning_rate": 1.8804685422823528e-05, "loss": 0.0601, "step": 2333 }, { "epoch": 0.55, "learning_rate": 1.8803483588410303e-05, "loss": 0.0401, "step": 2334 }, { "epoch": 0.55, "learning_rate": 1.8802281188551038e-05, "loss": 0.0438, "step": 2335 }, { "epoch": 0.55, "learning_rate": 1.8801078223322962e-05, "loss": 0.1067, "step": 2336 }, { "epoch": 0.55, "learning_rate": 1.8799874692803347e-05, "loss": 0.0712, "step": 2337 }, { "epoch": 0.55, "learning_rate": 1.8798670597069487e-05, "loss": 0.0998, "step": 2338 }, { "epoch": 0.55, "learning_rate": 1.8797465936198725e-05, "loss": 0.0504, "step": 2339 }, { "epoch": 0.55, "learning_rate": 1.879626071026843e-05, "loss": 0.0282, "step": 2340 }, { "epoch": 0.55, "learning_rate": 1.879505491935603e-05, "loss": 0.0947, "step": 2341 }, { "epoch": 0.55, "learning_rate": 1.8793848563538956e-05, "loss": 0.0567, "step": 2342 }, { "epoch": 0.55, "learning_rate": 1.87926416428947e-05, "loss": 0.073, "step": 2343 }, { "epoch": 0.55, "learning_rate": 1.8791434157500776e-05, "loss": 0.0316, "step": 2344 }, { "epoch": 0.55, "learning_rate": 1.8790226107434745e-05, "loss": 0.0438, "step": 2345 }, { "epoch": 0.55, "learning_rate": 1.8789017492774203e-05, "loss": 0.1242, "step": 2346 }, { "epoch": 0.55, "learning_rate": 1.8787808313596773e-05, "loss": 0.0775, "step": 2347 }, { "epoch": 0.55, "learning_rate": 1.8786598569980127e-05, "loss": 0.0857, "step": 2348 }, { "epoch": 0.55, "learning_rate": 1.8785388262001956e-05, "loss": 0.1452, "step": 2349 }, { "epoch": 0.55, "learning_rate": 1.878417738974001e-05, "loss": 0.0953, "step": 2350 }, { "epoch": 0.55, "learning_rate": 1.8782965953272055e-05, "loss": 0.0525, "step": 2351 }, { "epoch": 0.55, "learning_rate": 1.8781753952675904e-05, "loss": 0.0816, "step": 2352 }, { "epoch": 0.55, "learning_rate": 1.87805413880294e-05, "loss": 0.0553, "step": 2353 }, { "epoch": 0.55, "learning_rate": 1.8779328259410435e-05, "loss": 0.0529, "step": 2354 }, { "epoch": 0.55, "learning_rate": 1.877811456689692e-05, "loss": 0.0599, "step": 2355 }, { "epoch": 0.55, "learning_rate": 1.8776900310566812e-05, "loss": 0.056, "step": 2356 }, { "epoch": 0.55, "learning_rate": 1.87756854904981e-05, "loss": 0.035, "step": 2357 }, { "epoch": 0.55, "learning_rate": 1.8774470106768817e-05, "loss": 0.1129, "step": 2358 }, { "epoch": 0.55, "learning_rate": 1.8773254159457025e-05, "loss": 0.0378, "step": 2359 }, { "epoch": 0.55, "learning_rate": 1.8772037648640823e-05, "loss": 0.1022, "step": 2360 }, { "epoch": 0.55, "learning_rate": 1.877082057439835e-05, "loss": 0.1118, "step": 2361 }, { "epoch": 0.55, "learning_rate": 1.876960293680777e-05, "loss": 0.0871, "step": 2362 }, { "epoch": 0.55, "learning_rate": 1.8768384735947305e-05, "loss": 0.0403, "step": 2363 }, { "epoch": 0.55, "learning_rate": 1.876716597189519e-05, "loss": 0.078, "step": 2364 }, { "epoch": 0.56, "learning_rate": 1.8765946644729704e-05, "loss": 0.0293, "step": 2365 }, { "epoch": 0.56, "learning_rate": 1.876472675452917e-05, "loss": 0.1608, "step": 2366 }, { "epoch": 0.56, "learning_rate": 1.8763506301371945e-05, "loss": 0.0184, "step": 2367 }, { "epoch": 0.56, "learning_rate": 1.8762285285336408e-05, "loss": 0.0755, "step": 2368 }, { "epoch": 0.56, "learning_rate": 1.8761063706500994e-05, "loss": 0.0259, "step": 2369 }, { "epoch": 0.56, "learning_rate": 1.875984156494416e-05, "loss": 0.0391, "step": 2370 }, { "epoch": 0.56, "learning_rate": 1.87586188607444e-05, "loss": 0.1371, "step": 2371 }, { "epoch": 0.56, "learning_rate": 1.8757395593980263e-05, "loss": 0.0555, "step": 2372 }, { "epoch": 0.56, "learning_rate": 1.87561717647303e-05, "loss": 0.0778, "step": 2373 }, { "epoch": 0.56, "learning_rate": 1.8754947373073128e-05, "loss": 0.1096, "step": 2374 }, { "epoch": 0.56, "learning_rate": 1.875372241908739e-05, "loss": 0.0198, "step": 2375 }, { "epoch": 0.56, "learning_rate": 1.875249690285176e-05, "loss": 0.0579, "step": 2376 }, { "epoch": 0.56, "learning_rate": 1.8751270824444953e-05, "loss": 0.0749, "step": 2377 }, { "epoch": 0.56, "learning_rate": 1.8750044183945726e-05, "loss": 0.02, "step": 2378 }, { "epoch": 0.56, "learning_rate": 1.874881698143286e-05, "loss": 0.0536, "step": 2379 }, { "epoch": 0.56, "learning_rate": 1.874758921698518e-05, "loss": 0.1078, "step": 2380 }, { "epoch": 0.56, "learning_rate": 1.8746360890681546e-05, "loss": 0.1103, "step": 2381 }, { "epoch": 0.56, "learning_rate": 1.874513200260085e-05, "loss": 0.0212, "step": 2382 }, { "epoch": 0.56, "learning_rate": 1.8743902552822024e-05, "loss": 0.0651, "step": 2383 }, { "epoch": 0.56, "learning_rate": 1.874267254142404e-05, "loss": 0.1861, "step": 2384 }, { "epoch": 0.56, "learning_rate": 1.8741441968485893e-05, "loss": 0.0693, "step": 2385 }, { "epoch": 0.56, "learning_rate": 1.8740210834086633e-05, "loss": 0.082, "step": 2386 }, { "epoch": 0.56, "learning_rate": 1.8738979138305322e-05, "loss": 0.0335, "step": 2387 }, { "epoch": 0.56, "learning_rate": 1.8737746881221085e-05, "loss": 0.0438, "step": 2388 }, { "epoch": 0.56, "learning_rate": 1.873651406291306e-05, "loss": 0.0193, "step": 2389 }, { "epoch": 0.56, "learning_rate": 1.8735280683460438e-05, "loss": 0.0353, "step": 2390 }, { "epoch": 0.56, "learning_rate": 1.8734046742942437e-05, "loss": 0.0906, "step": 2391 }, { "epoch": 0.56, "learning_rate": 1.8732812241438306e-05, "loss": 0.0344, "step": 2392 }, { "epoch": 0.56, "learning_rate": 1.8731577179027348e-05, "loss": 0.0473, "step": 2393 }, { "epoch": 0.56, "learning_rate": 1.8730341555788883e-05, "loss": 0.0967, "step": 2394 }, { "epoch": 0.56, "learning_rate": 1.8729105371802273e-05, "loss": 0.0579, "step": 2395 }, { "epoch": 0.56, "learning_rate": 1.8727868627146924e-05, "loss": 0.0769, "step": 2396 }, { "epoch": 0.56, "learning_rate": 1.872663132190227e-05, "loss": 0.102, "step": 2397 }, { "epoch": 0.56, "learning_rate": 1.872539345614778e-05, "loss": 0.0579, "step": 2398 }, { "epoch": 0.56, "learning_rate": 1.8724155029962963e-05, "loss": 0.0456, "step": 2399 }, { "epoch": 0.56, "learning_rate": 1.8722916043427366e-05, "loss": 0.0208, "step": 2400 }, { "epoch": 0.56, "learning_rate": 1.872167649662057e-05, "loss": 0.085, "step": 2401 }, { "epoch": 0.56, "learning_rate": 1.8720436389622182e-05, "loss": 0.0143, "step": 2402 }, { "epoch": 0.56, "learning_rate": 1.8719195722511862e-05, "loss": 0.0567, "step": 2403 }, { "epoch": 0.56, "learning_rate": 1.8717954495369296e-05, "loss": 0.0223, "step": 2404 }, { "epoch": 0.56, "learning_rate": 1.8716712708274208e-05, "loss": 0.0836, "step": 2405 }, { "epoch": 0.56, "learning_rate": 1.8715470361306354e-05, "loss": 0.0716, "step": 2406 }, { "epoch": 0.56, "learning_rate": 1.8714227454545532e-05, "loss": 0.0554, "step": 2407 }, { "epoch": 0.57, "learning_rate": 1.8712983988071575e-05, "loss": 0.0206, "step": 2408 }, { "epoch": 0.57, "learning_rate": 1.8711739961964353e-05, "loss": 0.0576, "step": 2409 }, { "epoch": 0.57, "learning_rate": 1.8710495376303765e-05, "loss": 0.1407, "step": 2410 }, { "epoch": 0.57, "learning_rate": 1.870925023116975e-05, "loss": 0.044, "step": 2411 }, { "epoch": 0.57, "learning_rate": 1.8708004526642286e-05, "loss": 0.016, "step": 2412 }, { "epoch": 0.57, "learning_rate": 1.870675826280138e-05, "loss": 0.0956, "step": 2413 }, { "epoch": 0.57, "learning_rate": 1.8705511439727088e-05, "loss": 0.0806, "step": 2414 }, { "epoch": 0.57, "learning_rate": 1.8704264057499488e-05, "loss": 0.0453, "step": 2415 }, { "epoch": 0.57, "learning_rate": 1.8703016116198698e-05, "loss": 0.0358, "step": 2416 }, { "epoch": 0.57, "learning_rate": 1.8701767615904874e-05, "loss": 0.0692, "step": 2417 }, { "epoch": 0.57, "learning_rate": 1.8700518556698206e-05, "loss": 0.06, "step": 2418 }, { "epoch": 0.57, "learning_rate": 1.8699268938658925e-05, "loss": 0.0133, "step": 2419 }, { "epoch": 0.57, "learning_rate": 1.869801876186729e-05, "loss": 0.034, "step": 2420 }, { "epoch": 0.57, "learning_rate": 1.86967680264036e-05, "loss": 0.033, "step": 2421 }, { "epoch": 0.57, "learning_rate": 1.8695516732348187e-05, "loss": 0.0863, "step": 2422 }, { "epoch": 0.57, "learning_rate": 1.8694264879781426e-05, "loss": 0.0356, "step": 2423 }, { "epoch": 0.57, "learning_rate": 1.8693012468783722e-05, "loss": 0.0592, "step": 2424 }, { "epoch": 0.57, "learning_rate": 1.869175949943552e-05, "loss": 0.048, "step": 2425 }, { "epoch": 0.57, "learning_rate": 1.869050597181729e-05, "loss": 0.1188, "step": 2426 }, { "epoch": 0.57, "learning_rate": 1.8689251886009548e-05, "loss": 0.0847, "step": 2427 }, { "epoch": 0.57, "learning_rate": 1.868799724209285e-05, "loss": 0.052, "step": 2428 }, { "epoch": 0.57, "learning_rate": 1.868674204014778e-05, "loss": 0.0285, "step": 2429 }, { "epoch": 0.57, "learning_rate": 1.8685486280254953e-05, "loss": 0.0691, "step": 2430 }, { "epoch": 0.57, "learning_rate": 1.868422996249503e-05, "loss": 0.0546, "step": 2431 }, { "epoch": 0.57, "learning_rate": 1.8682973086948708e-05, "loss": 0.1261, "step": 2432 }, { "epoch": 0.57, "learning_rate": 1.868171565369671e-05, "loss": 0.0488, "step": 2433 }, { "epoch": 0.57, "learning_rate": 1.8680457662819803e-05, "loss": 0.0612, "step": 2434 }, { "epoch": 0.57, "learning_rate": 1.8679199114398785e-05, "loss": 0.097, "step": 2435 }, { "epoch": 0.57, "learning_rate": 1.8677940008514496e-05, "loss": 0.0581, "step": 2436 }, { "epoch": 0.57, "learning_rate": 1.867668034524781e-05, "loss": 0.0575, "step": 2437 }, { "epoch": 0.57, "learning_rate": 1.8675420124679627e-05, "loss": 0.0702, "step": 2438 }, { "epoch": 0.57, "learning_rate": 1.8674159346890898e-05, "loss": 0.0576, "step": 2439 }, { "epoch": 0.57, "learning_rate": 1.86728980119626e-05, "loss": 0.2128, "step": 2440 }, { "epoch": 0.57, "learning_rate": 1.8671636119975747e-05, "loss": 0.0301, "step": 2441 }, { "epoch": 0.57, "learning_rate": 1.8670373671011392e-05, "loss": 0.078, "step": 2442 }, { "epoch": 0.57, "learning_rate": 1.866911066515062e-05, "loss": 0.1288, "step": 2443 }, { "epoch": 0.57, "learning_rate": 1.8667847102474557e-05, "loss": 0.0705, "step": 2444 }, { "epoch": 0.57, "learning_rate": 1.8666582983064357e-05, "loss": 0.0657, "step": 2445 }, { "epoch": 0.57, "learning_rate": 1.866531830700122e-05, "loss": 0.0534, "step": 2446 }, { "epoch": 0.57, "learning_rate": 1.8664053074366367e-05, "loss": 0.0779, "step": 2447 }, { "epoch": 0.57, "learning_rate": 1.8662787285241074e-05, "loss": 0.0438, "step": 2448 }, { "epoch": 0.57, "learning_rate": 1.8661520939706634e-05, "loss": 0.0115, "step": 2449 }, { "epoch": 0.57, "learning_rate": 1.866025403784439e-05, "loss": 0.0213, "step": 2450 }, { "epoch": 0.58, "learning_rate": 1.865898657973571e-05, "loss": 0.0826, "step": 2451 }, { "epoch": 0.58, "learning_rate": 1.8657718565462003e-05, "loss": 0.0727, "step": 2452 }, { "epoch": 0.58, "learning_rate": 1.865644999510472e-05, "loss": 0.0698, "step": 2453 }, { "epoch": 0.58, "learning_rate": 1.865518086874533e-05, "loss": 0.1519, "step": 2454 }, { "epoch": 0.58, "learning_rate": 1.865391118646536e-05, "loss": 0.0694, "step": 2455 }, { "epoch": 0.58, "learning_rate": 1.8652640948346356e-05, "loss": 0.097, "step": 2456 }, { "epoch": 0.58, "learning_rate": 1.8651370154469902e-05, "loss": 0.0433, "step": 2457 }, { "epoch": 0.58, "learning_rate": 1.865009880491763e-05, "loss": 0.0641, "step": 2458 }, { "epoch": 0.58, "learning_rate": 1.8648826899771188e-05, "loss": 0.1016, "step": 2459 }, { "epoch": 0.58, "learning_rate": 1.8647554439112276e-05, "loss": 0.0558, "step": 2460 }, { "epoch": 0.58, "learning_rate": 1.8646281423022622e-05, "loss": 0.0643, "step": 2461 }, { "epoch": 0.58, "learning_rate": 1.8645007851583995e-05, "loss": 0.0478, "step": 2462 }, { "epoch": 0.58, "learning_rate": 1.8643733724878195e-05, "loss": 0.0346, "step": 2463 }, { "epoch": 0.58, "learning_rate": 1.8642459042987052e-05, "loss": 0.067, "step": 2464 }, { "epoch": 0.58, "learning_rate": 1.864118380599245e-05, "loss": 0.0548, "step": 2465 }, { "epoch": 0.58, "learning_rate": 1.863990801397629e-05, "loss": 0.0736, "step": 2466 }, { "epoch": 0.58, "learning_rate": 1.8638631667020515e-05, "loss": 0.073, "step": 2467 }, { "epoch": 0.58, "learning_rate": 1.863735476520711e-05, "loss": 0.0383, "step": 2468 }, { "epoch": 0.58, "learning_rate": 1.8636077308618087e-05, "loss": 0.0731, "step": 2469 }, { "epoch": 0.58, "learning_rate": 1.8634799297335494e-05, "loss": 0.0778, "step": 2470 }, { "epoch": 0.58, "learning_rate": 1.8633520731441422e-05, "loss": 0.0854, "step": 2471 }, { "epoch": 0.58, "learning_rate": 1.8632241611017994e-05, "loss": 0.0723, "step": 2472 }, { "epoch": 0.58, "learning_rate": 1.863096193614736e-05, "loss": 0.0563, "step": 2473 }, { "epoch": 0.58, "learning_rate": 1.8629681706911723e-05, "loss": 0.0527, "step": 2474 }, { "epoch": 0.58, "learning_rate": 1.8628400923393302e-05, "loss": 0.1185, "step": 2475 }, { "epoch": 0.58, "learning_rate": 1.862711958567437e-05, "loss": 0.0867, "step": 2476 }, { "epoch": 0.58, "learning_rate": 1.862583769383722e-05, "loss": 0.0683, "step": 2477 }, { "epoch": 0.58, "learning_rate": 1.8624555247964197e-05, "loss": 0.0528, "step": 2478 }, { "epoch": 0.58, "learning_rate": 1.8623272248137663e-05, "loss": 0.0541, "step": 2479 }, { "epoch": 0.58, "learning_rate": 1.862198869444003e-05, "loss": 0.0639, "step": 2480 }, { "epoch": 0.58, "learning_rate": 1.862070458695374e-05, "loss": 0.1101, "step": 2481 }, { "epoch": 0.58, "learning_rate": 1.8619419925761267e-05, "loss": 0.1154, "step": 2482 }, { "epoch": 0.58, "learning_rate": 1.8618134710945127e-05, "loss": 0.079, "step": 2483 }, { "epoch": 0.58, "learning_rate": 1.861684894258787e-05, "loss": 0.1048, "step": 2484 }, { "epoch": 0.58, "learning_rate": 1.8615562620772085e-05, "loss": 0.0752, "step": 2485 }, { "epoch": 0.58, "learning_rate": 1.8614275745580383e-05, "loss": 0.0674, "step": 2486 }, { "epoch": 0.58, "learning_rate": 1.861298831709542e-05, "loss": 0.0313, "step": 2487 }, { "epoch": 0.58, "learning_rate": 1.8611700335399897e-05, "loss": 0.0878, "step": 2488 }, { "epoch": 0.58, "learning_rate": 1.861041180057653e-05, "loss": 0.0588, "step": 2489 }, { "epoch": 0.58, "learning_rate": 1.8609122712708092e-05, "loss": 0.1762, "step": 2490 }, { "epoch": 0.58, "learning_rate": 1.860783307187737e-05, "loss": 0.0527, "step": 2491 }, { "epoch": 0.58, "learning_rate": 1.860654287816721e-05, "loss": 0.0276, "step": 2492 }, { "epoch": 0.59, "learning_rate": 1.860525213166047e-05, "loss": 0.0591, "step": 2493 }, { "epoch": 0.59, "learning_rate": 1.8603960832440054e-05, "loss": 0.0363, "step": 2494 }, { "epoch": 0.59, "learning_rate": 1.860266898058891e-05, "loss": 0.0549, "step": 2495 }, { "epoch": 0.59, "learning_rate": 1.860137657619001e-05, "loss": 0.0403, "step": 2496 }, { "epoch": 0.59, "learning_rate": 1.8600083619326363e-05, "loss": 0.0283, "step": 2497 }, { "epoch": 0.59, "learning_rate": 1.8598790110081017e-05, "loss": 0.0072, "step": 2498 }, { "epoch": 0.59, "learning_rate": 1.859749604853705e-05, "loss": 0.0107, "step": 2499 }, { "epoch": 0.59, "learning_rate": 1.8596201434777586e-05, "loss": 0.0778, "step": 2500 }, { "epoch": 0.59, "learning_rate": 1.8594906268885774e-05, "loss": 0.0718, "step": 2501 }, { "epoch": 0.59, "learning_rate": 1.8593610550944803e-05, "loss": 0.0191, "step": 2502 }, { "epoch": 0.59, "learning_rate": 1.8592314281037895e-05, "loss": 0.022, "step": 2503 }, { "epoch": 0.59, "learning_rate": 1.8591017459248313e-05, "loss": 0.1267, "step": 2504 }, { "epoch": 0.59, "learning_rate": 1.8589720085659345e-05, "loss": 0.0552, "step": 2505 }, { "epoch": 0.59, "learning_rate": 1.8588422160354328e-05, "loss": 0.0528, "step": 2506 }, { "epoch": 0.59, "learning_rate": 1.8587123683416624e-05, "loss": 0.0653, "step": 2507 }, { "epoch": 0.59, "learning_rate": 1.8585824654929635e-05, "loss": 0.0994, "step": 2508 }, { "epoch": 0.59, "learning_rate": 1.85845250749768e-05, "loss": 0.0098, "step": 2509 }, { "epoch": 0.59, "learning_rate": 1.8583224943641586e-05, "loss": 0.0941, "step": 2510 }, { "epoch": 0.59, "learning_rate": 1.8581924261007502e-05, "loss": 0.138, "step": 2511 }, { "epoch": 0.59, "learning_rate": 1.8580623027158088e-05, "loss": 0.0136, "step": 2512 }, { "epoch": 0.59, "learning_rate": 1.8579321242176926e-05, "loss": 0.0557, "step": 2513 }, { "epoch": 0.59, "learning_rate": 1.857801890614763e-05, "loss": 0.0763, "step": 2514 }, { "epoch": 0.59, "learning_rate": 1.8576716019153846e-05, "loss": 0.0609, "step": 2515 }, { "epoch": 0.59, "learning_rate": 1.8575412581279258e-05, "loss": 0.0192, "step": 2516 }, { "epoch": 0.59, "learning_rate": 1.8574108592607585e-05, "loss": 0.0526, "step": 2517 }, { "epoch": 0.59, "learning_rate": 1.8572804053222587e-05, "loss": 0.1238, "step": 2518 }, { "epoch": 0.59, "learning_rate": 1.8571498963208047e-05, "loss": 0.0996, "step": 2519 }, { "epoch": 0.59, "learning_rate": 1.8570193322647797e-05, "loss": 0.0573, "step": 2520 }, { "epoch": 0.59, "learning_rate": 1.8568887131625693e-05, "loss": 0.0696, "step": 2521 }, { "epoch": 0.59, "learning_rate": 1.8567580390225636e-05, "loss": 0.1229, "step": 2522 }, { "epoch": 0.59, "learning_rate": 1.8566273098531556e-05, "loss": 0.0456, "step": 2523 }, { "epoch": 0.59, "learning_rate": 1.856496525662742e-05, "loss": 0.1163, "step": 2524 }, { "epoch": 0.59, "learning_rate": 1.856365686459723e-05, "loss": 0.0731, "step": 2525 }, { "epoch": 0.59, "learning_rate": 1.856234792252502e-05, "loss": 0.0558, "step": 2526 }, { "epoch": 0.59, "learning_rate": 1.8561038430494874e-05, "loss": 0.0875, "step": 2527 }, { "epoch": 0.59, "learning_rate": 1.8559728388590887e-05, "loss": 0.0927, "step": 2528 }, { "epoch": 0.59, "learning_rate": 1.855841779689721e-05, "loss": 0.0405, "step": 2529 }, { "epoch": 0.59, "learning_rate": 1.8557106655498024e-05, "loss": 0.1631, "step": 2530 }, { "epoch": 0.59, "learning_rate": 1.855579496447754e-05, "loss": 0.1302, "step": 2531 }, { "epoch": 0.59, "learning_rate": 1.8554482723920006e-05, "loss": 0.06, "step": 2532 }, { "epoch": 0.59, "learning_rate": 1.855316993390971e-05, "loss": 0.1279, "step": 2533 }, { "epoch": 0.59, "learning_rate": 1.8551856594530976e-05, "loss": 0.0622, "step": 2534 }, { "epoch": 0.59, "learning_rate": 1.855054270586815e-05, "loss": 0.0963, "step": 2535 }, { "epoch": 0.6, "learning_rate": 1.854922826800563e-05, "loss": 0.0652, "step": 2536 }, { "epoch": 0.6, "learning_rate": 1.8547913281027837e-05, "loss": 0.1104, "step": 2537 }, { "epoch": 0.6, "learning_rate": 1.8546597745019238e-05, "loss": 0.0982, "step": 2538 }, { "epoch": 0.6, "learning_rate": 1.8545281660064324e-05, "loss": 0.0626, "step": 2539 }, { "epoch": 0.6, "learning_rate": 1.854396502624763e-05, "loss": 0.0509, "step": 2540 }, { "epoch": 0.6, "learning_rate": 1.8542647843653726e-05, "loss": 0.0688, "step": 2541 }, { "epoch": 0.6, "learning_rate": 1.854133011236721e-05, "loss": 0.031, "step": 2542 }, { "epoch": 0.6, "learning_rate": 1.854001183247272e-05, "loss": 0.0529, "step": 2543 }, { "epoch": 0.6, "learning_rate": 1.853869300405493e-05, "loss": 0.0286, "step": 2544 }, { "epoch": 0.6, "learning_rate": 1.853737362719855e-05, "loss": 0.05, "step": 2545 }, { "epoch": 0.6, "learning_rate": 1.8536053701988318e-05, "loss": 0.0633, "step": 2546 }, { "epoch": 0.6, "learning_rate": 1.853473322850902e-05, "loss": 0.069, "step": 2547 }, { "epoch": 0.6, "learning_rate": 1.8533412206845457e-05, "loss": 0.02, "step": 2548 }, { "epoch": 0.6, "learning_rate": 1.8532090637082493e-05, "loss": 0.0431, "step": 2549 }, { "epoch": 0.6, "learning_rate": 1.8530768519305005e-05, "loss": 0.0326, "step": 2550 }, { "epoch": 0.6, "learning_rate": 1.8529445853597912e-05, "loss": 0.0475, "step": 2551 }, { "epoch": 0.6, "learning_rate": 1.8528122640046172e-05, "loss": 0.0554, "step": 2552 }, { "epoch": 0.6, "learning_rate": 1.852679887873477e-05, "loss": 0.081, "step": 2553 }, { "epoch": 0.6, "learning_rate": 1.8525474569748733e-05, "loss": 0.0833, "step": 2554 }, { "epoch": 0.6, "learning_rate": 1.8524149713173117e-05, "loss": 0.0396, "step": 2555 }, { "epoch": 0.6, "learning_rate": 1.852282430909303e-05, "loss": 0.0609, "step": 2556 }, { "epoch": 0.6, "learning_rate": 1.8521498357593587e-05, "loss": 0.0459, "step": 2557 }, { "epoch": 0.6, "learning_rate": 1.8520171858759966e-05, "loss": 0.0783, "step": 2558 }, { "epoch": 0.6, "learning_rate": 1.8518844812677355e-05, "loss": 0.0222, "step": 2559 }, { "epoch": 0.6, "learning_rate": 1.8517517219431003e-05, "loss": 0.0534, "step": 2560 }, { "epoch": 0.6, "learning_rate": 1.8516189079106176e-05, "loss": 0.0348, "step": 2561 }, { "epoch": 0.6, "learning_rate": 1.851486039178818e-05, "loss": 0.0944, "step": 2562 }, { "epoch": 0.6, "learning_rate": 1.851353115756235e-05, "loss": 0.0185, "step": 2563 }, { "epoch": 0.6, "learning_rate": 1.8512201376514075e-05, "loss": 0.1406, "step": 2564 }, { "epoch": 0.6, "learning_rate": 1.8510871048728755e-05, "loss": 0.2703, "step": 2565 }, { "epoch": 0.6, "learning_rate": 1.8509540174291848e-05, "loss": 0.0957, "step": 2566 }, { "epoch": 0.6, "learning_rate": 1.850820875328882e-05, "loss": 0.0982, "step": 2567 }, { "epoch": 0.6, "learning_rate": 1.8506876785805206e-05, "loss": 0.0781, "step": 2568 }, { "epoch": 0.6, "learning_rate": 1.8505544271926546e-05, "loss": 0.1042, "step": 2569 }, { "epoch": 0.6, "learning_rate": 1.850421121173843e-05, "loss": 0.07, "step": 2570 }, { "epoch": 0.6, "learning_rate": 1.850287760532648e-05, "loss": 0.0808, "step": 2571 }, { "epoch": 0.6, "learning_rate": 1.8501543452776353e-05, "loss": 0.0444, "step": 2572 }, { "epoch": 0.6, "learning_rate": 1.8500208754173746e-05, "loss": 0.0155, "step": 2573 }, { "epoch": 0.6, "learning_rate": 1.849887350960438e-05, "loss": 0.0846, "step": 2574 }, { "epoch": 0.6, "learning_rate": 1.8497537719154023e-05, "loss": 0.1103, "step": 2575 }, { "epoch": 0.6, "learning_rate": 1.8496201382908464e-05, "loss": 0.0318, "step": 2576 }, { "epoch": 0.6, "learning_rate": 1.8494864500953547e-05, "loss": 0.0724, "step": 2577 }, { "epoch": 0.61, "learning_rate": 1.8493527073375133e-05, "loss": 0.1219, "step": 2578 }, { "epoch": 0.61, "learning_rate": 1.8492189100259125e-05, "loss": 0.0086, "step": 2579 }, { "epoch": 0.61, "learning_rate": 1.849085058169146e-05, "loss": 0.0329, "step": 2580 }, { "epoch": 0.61, "learning_rate": 1.848951151775811e-05, "loss": 0.1196, "step": 2581 }, { "epoch": 0.61, "learning_rate": 1.848817190854509e-05, "loss": 0.0342, "step": 2582 }, { "epoch": 0.61, "learning_rate": 1.848683175413844e-05, "loss": 0.1005, "step": 2583 }, { "epoch": 0.61, "learning_rate": 1.8485491054624233e-05, "loss": 0.0243, "step": 2584 }, { "epoch": 0.61, "learning_rate": 1.8484149810088584e-05, "loss": 0.0663, "step": 2585 }, { "epoch": 0.61, "learning_rate": 1.8482808020617643e-05, "loss": 0.0969, "step": 2586 }, { "epoch": 0.61, "learning_rate": 1.8481465686297587e-05, "loss": 0.0317, "step": 2587 }, { "epoch": 0.61, "learning_rate": 1.8480122807214643e-05, "loss": 0.0761, "step": 2588 }, { "epoch": 0.61, "learning_rate": 1.847877938345506e-05, "loss": 0.1187, "step": 2589 }, { "epoch": 0.61, "learning_rate": 1.847743541510512e-05, "loss": 0.1306, "step": 2590 }, { "epoch": 0.61, "learning_rate": 1.8476090902251154e-05, "loss": 0.1203, "step": 2591 }, { "epoch": 0.61, "learning_rate": 1.8474745844979517e-05, "loss": 0.0647, "step": 2592 }, { "epoch": 0.61, "learning_rate": 1.84734002433766e-05, "loss": 0.0295, "step": 2593 }, { "epoch": 0.61, "learning_rate": 1.847205409752883e-05, "loss": 0.0489, "step": 2594 }, { "epoch": 0.61, "learning_rate": 1.8470707407522677e-05, "loss": 0.0397, "step": 2595 }, { "epoch": 0.61, "learning_rate": 1.8469360173444633e-05, "loss": 0.0706, "step": 2596 }, { "epoch": 0.61, "learning_rate": 1.8468012395381227e-05, "loss": 0.0387, "step": 2597 }, { "epoch": 0.61, "learning_rate": 1.8466664073419032e-05, "loss": 0.0521, "step": 2598 }, { "epoch": 0.61, "learning_rate": 1.846531520764465e-05, "loss": 0.0461, "step": 2599 }, { "epoch": 0.61, "learning_rate": 1.846396579814472e-05, "loss": 0.0243, "step": 2600 }, { "epoch": 0.61, "learning_rate": 1.846261584500591e-05, "loss": 0.0594, "step": 2601 }, { "epoch": 0.61, "learning_rate": 1.8461265348314927e-05, "loss": 0.1067, "step": 2602 }, { "epoch": 0.61, "learning_rate": 1.8459914308158512e-05, "loss": 0.0582, "step": 2603 }, { "epoch": 0.61, "learning_rate": 1.845856272462345e-05, "loss": 0.1007, "step": 2604 }, { "epoch": 0.61, "learning_rate": 1.845721059779655e-05, "loss": 0.141, "step": 2605 }, { "epoch": 0.61, "learning_rate": 1.8455857927764653e-05, "loss": 0.0344, "step": 2606 }, { "epoch": 0.61, "learning_rate": 1.8454504714614645e-05, "loss": 0.0619, "step": 2607 }, { "epoch": 0.61, "learning_rate": 1.8453150958433445e-05, "loss": 0.1134, "step": 2608 }, { "epoch": 0.61, "learning_rate": 1.8451796659307997e-05, "loss": 0.071, "step": 2609 }, { "epoch": 0.61, "learning_rate": 1.8450441817325297e-05, "loss": 0.0786, "step": 2610 }, { "epoch": 0.61, "learning_rate": 1.8449086432572358e-05, "loss": 0.1208, "step": 2611 }, { "epoch": 0.61, "learning_rate": 1.844773050513624e-05, "loss": 0.0692, "step": 2612 }, { "epoch": 0.61, "learning_rate": 1.8446374035104036e-05, "loss": 0.081, "step": 2613 }, { "epoch": 0.61, "learning_rate": 1.8445017022562868e-05, "loss": 0.059, "step": 2614 }, { "epoch": 0.61, "learning_rate": 1.8443659467599896e-05, "loss": 0.1409, "step": 2615 }, { "epoch": 0.61, "learning_rate": 1.8442301370302317e-05, "loss": 0.016, "step": 2616 }, { "epoch": 0.61, "learning_rate": 1.8440942730757367e-05, "loss": 0.0395, "step": 2617 }, { "epoch": 0.61, "learning_rate": 1.8439583549052302e-05, "loss": 0.0615, "step": 2618 }, { "epoch": 0.61, "learning_rate": 1.8438223825274425e-05, "loss": 0.0369, "step": 2619 }, { "epoch": 0.61, "learning_rate": 1.8436863559511074e-05, "loss": 0.0618, "step": 2620 }, { "epoch": 0.62, "learning_rate": 1.8435502751849614e-05, "loss": 0.0817, "step": 2621 }, { "epoch": 0.62, "learning_rate": 1.843414140237745e-05, "loss": 0.0213, "step": 2622 }, { "epoch": 0.62, "learning_rate": 1.8432779511182032e-05, "loss": 0.1398, "step": 2623 }, { "epoch": 0.62, "learning_rate": 1.8431417078350816e-05, "loss": 0.1178, "step": 2624 }, { "epoch": 0.62, "learning_rate": 1.843005410397132e-05, "loss": 0.1606, "step": 2625 }, { "epoch": 0.62, "learning_rate": 1.842869058813109e-05, "loss": 0.0699, "step": 2626 }, { "epoch": 0.62, "learning_rate": 1.8427326530917703e-05, "loss": 0.0634, "step": 2627 }, { "epoch": 0.62, "learning_rate": 1.8425961932418767e-05, "loss": 0.063, "step": 2628 }, { "epoch": 0.62, "learning_rate": 1.8424596792721935e-05, "loss": 0.0178, "step": 2629 }, { "epoch": 0.62, "learning_rate": 1.842323111191489e-05, "loss": 0.0382, "step": 2630 }, { "epoch": 0.62, "learning_rate": 1.8421864890085347e-05, "loss": 0.0873, "step": 2631 }, { "epoch": 0.62, "learning_rate": 1.8420498127321058e-05, "loss": 0.0461, "step": 2632 }, { "epoch": 0.62, "learning_rate": 1.8419130823709814e-05, "loss": 0.0755, "step": 2633 }, { "epoch": 0.62, "learning_rate": 1.8417762979339427e-05, "loss": 0.0411, "step": 2634 }, { "epoch": 0.62, "learning_rate": 1.8416394594297763e-05, "loss": 0.074, "step": 2635 }, { "epoch": 0.62, "learning_rate": 1.8415025668672712e-05, "loss": 0.0266, "step": 2636 }, { "epoch": 0.62, "learning_rate": 1.8413656202552194e-05, "loss": 0.0349, "step": 2637 }, { "epoch": 0.62, "learning_rate": 1.8412286196024175e-05, "loss": 0.0992, "step": 2638 }, { "epoch": 0.62, "learning_rate": 1.841091564917665e-05, "loss": 0.0118, "step": 2639 }, { "epoch": 0.62, "learning_rate": 1.8409544562097648e-05, "loss": 0.1112, "step": 2640 }, { "epoch": 0.62, "learning_rate": 1.840817293487523e-05, "loss": 0.0714, "step": 2641 }, { "epoch": 0.62, "learning_rate": 1.84068007675975e-05, "loss": 0.0309, "step": 2642 }, { "epoch": 0.62, "learning_rate": 1.840542806035259e-05, "loss": 0.0807, "step": 2643 }, { "epoch": 0.62, "learning_rate": 1.8404054813228672e-05, "loss": 0.0554, "step": 2644 }, { "epoch": 0.62, "learning_rate": 1.8402681026313943e-05, "loss": 0.0772, "step": 2645 }, { "epoch": 0.62, "learning_rate": 1.8401306699696647e-05, "loss": 0.098, "step": 2646 }, { "epoch": 0.62, "learning_rate": 1.8399931833465053e-05, "loss": 0.0424, "step": 2647 }, { "epoch": 0.62, "learning_rate": 1.839855642770747e-05, "loss": 0.0326, "step": 2648 }, { "epoch": 0.62, "learning_rate": 1.839718048251224e-05, "loss": 0.0793, "step": 2649 }, { "epoch": 0.62, "learning_rate": 1.839580399796774e-05, "loss": 0.0539, "step": 2650 }, { "epoch": 0.62, "learning_rate": 1.839442697416238e-05, "loss": 0.0302, "step": 2651 }, { "epoch": 0.62, "learning_rate": 1.8393049411184608e-05, "loss": 0.0243, "step": 2652 }, { "epoch": 0.62, "learning_rate": 1.83916713091229e-05, "loss": 0.0834, "step": 2653 }, { "epoch": 0.62, "learning_rate": 1.839029266806578e-05, "loss": 0.109, "step": 2654 }, { "epoch": 0.62, "learning_rate": 1.838891348810179e-05, "loss": 0.063, "step": 2655 }, { "epoch": 0.62, "learning_rate": 1.8387533769319517e-05, "loss": 0.1313, "step": 2656 }, { "epoch": 0.62, "learning_rate": 1.838615351180758e-05, "loss": 0.043, "step": 2657 }, { "epoch": 0.62, "learning_rate": 1.8384772715654634e-05, "loss": 0.0238, "step": 2658 }, { "epoch": 0.62, "learning_rate": 1.8383391380949364e-05, "loss": 0.0162, "step": 2659 }, { "epoch": 0.62, "learning_rate": 1.83820095077805e-05, "loss": 0.0392, "step": 2660 }, { "epoch": 0.62, "learning_rate": 1.838062709623679e-05, "loss": 0.0501, "step": 2661 }, { "epoch": 0.62, "learning_rate": 1.8379244146407028e-05, "loss": 0.0446, "step": 2662 }, { "epoch": 0.62, "learning_rate": 1.8377860658380044e-05, "loss": 0.0203, "step": 2663 }, { "epoch": 0.63, "learning_rate": 1.83764766322447e-05, "loss": 0.0369, "step": 2664 }, { "epoch": 0.63, "learning_rate": 1.837509206808989e-05, "loss": 0.0511, "step": 2665 }, { "epoch": 0.63, "learning_rate": 1.8373706966004542e-05, "loss": 0.0462, "step": 2666 }, { "epoch": 0.63, "learning_rate": 1.837232132607763e-05, "loss": 0.0307, "step": 2667 }, { "epoch": 0.63, "learning_rate": 1.837093514839814e-05, "loss": 0.0525, "step": 2668 }, { "epoch": 0.63, "learning_rate": 1.8369548433055114e-05, "loss": 0.0602, "step": 2669 }, { "epoch": 0.63, "learning_rate": 1.8368161180137617e-05, "loss": 0.0372, "step": 2670 }, { "epoch": 0.63, "learning_rate": 1.8366773389734754e-05, "loss": 0.055, "step": 2671 }, { "epoch": 0.63, "learning_rate": 1.8365385061935663e-05, "loss": 0.0927, "step": 2672 }, { "epoch": 0.63, "learning_rate": 1.8363996196829513e-05, "loss": 0.1195, "step": 2673 }, { "epoch": 0.63, "learning_rate": 1.8362606794505516e-05, "loss": 0.0585, "step": 2674 }, { "epoch": 0.63, "learning_rate": 1.836121685505291e-05, "loss": 0.1135, "step": 2675 }, { "epoch": 0.63, "learning_rate": 1.835982637856097e-05, "loss": 0.0744, "step": 2676 }, { "epoch": 0.63, "learning_rate": 1.8358435365119006e-05, "loss": 0.0459, "step": 2677 }, { "epoch": 0.63, "learning_rate": 1.8357043814816362e-05, "loss": 0.0406, "step": 2678 }, { "epoch": 0.63, "learning_rate": 1.8355651727742418e-05, "loss": 0.0637, "step": 2679 }, { "epoch": 0.63, "learning_rate": 1.835425910398659e-05, "loss": 0.0525, "step": 2680 }, { "epoch": 0.63, "learning_rate": 1.835286594363832e-05, "loss": 0.1075, "step": 2681 }, { "epoch": 0.63, "learning_rate": 1.8351472246787097e-05, "loss": 0.0625, "step": 2682 }, { "epoch": 0.63, "learning_rate": 1.835007801352243e-05, "loss": 0.1055, "step": 2683 }, { "epoch": 0.63, "learning_rate": 1.8348683243933877e-05, "loss": 0.0427, "step": 2684 }, { "epoch": 0.63, "learning_rate": 1.8347287938111026e-05, "loss": 0.107, "step": 2685 }, { "epoch": 0.63, "learning_rate": 1.8345892096143486e-05, "loss": 0.0405, "step": 2686 }, { "epoch": 0.63, "learning_rate": 1.8344495718120923e-05, "loss": 0.042, "step": 2687 }, { "epoch": 0.63, "learning_rate": 1.834309880413302e-05, "loss": 0.0531, "step": 2688 }, { "epoch": 0.63, "learning_rate": 1.8341701354269504e-05, "loss": 0.1142, "step": 2689 }, { "epoch": 0.63, "learning_rate": 1.8340303368620132e-05, "loss": 0.045, "step": 2690 }, { "epoch": 0.63, "learning_rate": 1.8338904847274693e-05, "loss": 0.0133, "step": 2691 }, { "epoch": 0.63, "learning_rate": 1.8337505790323016e-05, "loss": 0.0329, "step": 2692 }, { "epoch": 0.63, "learning_rate": 1.8336106197854965e-05, "loss": 0.0632, "step": 2693 }, { "epoch": 0.63, "learning_rate": 1.833470606996043e-05, "loss": 0.0278, "step": 2694 }, { "epoch": 0.63, "learning_rate": 1.8333305406729348e-05, "loss": 0.1683, "step": 2695 }, { "epoch": 0.63, "learning_rate": 1.8331904208251677e-05, "loss": 0.1092, "step": 2696 }, { "epoch": 0.63, "learning_rate": 1.8330502474617418e-05, "loss": 0.0512, "step": 2697 }, { "epoch": 0.63, "learning_rate": 1.8329100205916603e-05, "loss": 0.0733, "step": 2698 }, { "epoch": 0.63, "learning_rate": 1.8327697402239303e-05, "loss": 0.1226, "step": 2699 }, { "epoch": 0.63, "learning_rate": 1.8326294063675615e-05, "loss": 0.1302, "step": 2700 }, { "epoch": 0.63, "learning_rate": 1.832489019031568e-05, "loss": 0.0298, "step": 2701 }, { "epoch": 0.63, "learning_rate": 1.8323485782249662e-05, "loss": 0.0638, "step": 2702 }, { "epoch": 0.63, "learning_rate": 1.8322080839567772e-05, "loss": 0.0173, "step": 2703 }, { "epoch": 0.63, "learning_rate": 1.8320675362360248e-05, "loss": 0.1129, "step": 2704 }, { "epoch": 0.63, "learning_rate": 1.831926935071736e-05, "loss": 0.0157, "step": 2705 }, { "epoch": 0.64, "learning_rate": 1.831786280472942e-05, "loss": 0.0274, "step": 2706 }, { "epoch": 0.64, "learning_rate": 1.8316455724486772e-05, "loss": 0.1267, "step": 2707 }, { "epoch": 0.64, "learning_rate": 1.8315048110079786e-05, "loss": 0.0281, "step": 2708 }, { "epoch": 0.64, "learning_rate": 1.8313639961598873e-05, "loss": 0.0466, "step": 2709 }, { "epoch": 0.64, "learning_rate": 1.8312231279134485e-05, "loss": 0.0484, "step": 2710 }, { "epoch": 0.64, "learning_rate": 1.8310822062777096e-05, "loss": 0.152, "step": 2711 }, { "epoch": 0.64, "learning_rate": 1.8309412312617222e-05, "loss": 0.094, "step": 2712 }, { "epoch": 0.64, "learning_rate": 1.830800202874541e-05, "loss": 0.0365, "step": 2713 }, { "epoch": 0.64, "learning_rate": 1.830659121125224e-05, "loss": 0.0975, "step": 2714 }, { "epoch": 0.64, "learning_rate": 1.8305179860228333e-05, "loss": 0.097, "step": 2715 }, { "epoch": 0.64, "learning_rate": 1.8303767975764336e-05, "loss": 0.0511, "step": 2716 }, { "epoch": 0.64, "learning_rate": 1.8302355557950936e-05, "loss": 0.029, "step": 2717 }, { "epoch": 0.64, "learning_rate": 1.8300942606878854e-05, "loss": 0.1209, "step": 2718 }, { "epoch": 0.64, "learning_rate": 1.829952912263884e-05, "loss": 0.1317, "step": 2719 }, { "epoch": 0.64, "learning_rate": 1.8298115105321684e-05, "loss": 0.0461, "step": 2720 }, { "epoch": 0.64, "learning_rate": 1.8296700555018207e-05, "loss": 0.0652, "step": 2721 }, { "epoch": 0.64, "learning_rate": 1.8295285471819266e-05, "loss": 0.0766, "step": 2722 }, { "epoch": 0.64, "learning_rate": 1.829386985581575e-05, "loss": 0.0613, "step": 2723 }, { "epoch": 0.64, "learning_rate": 1.8292453707098588e-05, "loss": 0.0513, "step": 2724 }, { "epoch": 0.64, "learning_rate": 1.8291037025758736e-05, "loss": 0.0718, "step": 2725 }, { "epoch": 0.64, "learning_rate": 1.8289619811887186e-05, "loss": 0.028, "step": 2726 }, { "epoch": 0.64, "learning_rate": 1.8288202065574964e-05, "loss": 0.1206, "step": 2727 }, { "epoch": 0.64, "learning_rate": 1.8286783786913136e-05, "loss": 0.1014, "step": 2728 }, { "epoch": 0.64, "learning_rate": 1.82853649759928e-05, "loss": 0.1415, "step": 2729 }, { "epoch": 0.64, "learning_rate": 1.8283945632905074e-05, "loss": 0.1173, "step": 2730 }, { "epoch": 0.64, "learning_rate": 1.8282525757741134e-05, "loss": 0.1219, "step": 2731 }, { "epoch": 0.64, "learning_rate": 1.8281105350592175e-05, "loss": 0.0875, "step": 2732 }, { "epoch": 0.64, "learning_rate": 1.8279684411549426e-05, "loss": 0.0443, "step": 2733 }, { "epoch": 0.64, "learning_rate": 1.8278262940704163e-05, "loss": 0.0778, "step": 2734 }, { "epoch": 0.64, "learning_rate": 1.8276840938147672e-05, "loss": 0.0923, "step": 2735 }, { "epoch": 0.64, "learning_rate": 1.82754184039713e-05, "loss": 0.1443, "step": 2736 }, { "epoch": 0.64, "learning_rate": 1.8273995338266414e-05, "loss": 0.0184, "step": 2737 }, { "epoch": 0.64, "learning_rate": 1.8272571741124416e-05, "loss": 0.0435, "step": 2738 }, { "epoch": 0.64, "learning_rate": 1.8271147612636736e-05, "loss": 0.1086, "step": 2739 }, { "epoch": 0.64, "learning_rate": 1.8269722952894856e-05, "loss": 0.0427, "step": 2740 }, { "epoch": 0.64, "learning_rate": 1.8268297761990278e-05, "loss": 0.0583, "step": 2741 }, { "epoch": 0.64, "learning_rate": 1.8266872040014544e-05, "loss": 0.0427, "step": 2742 }, { "epoch": 0.64, "learning_rate": 1.8265445787059223e-05, "loss": 0.1733, "step": 2743 }, { "epoch": 0.64, "learning_rate": 1.826401900321593e-05, "loss": 0.0462, "step": 2744 }, { "epoch": 0.64, "learning_rate": 1.8262591688576294e-05, "loss": 0.0576, "step": 2745 }, { "epoch": 0.64, "learning_rate": 1.8261163843232005e-05, "loss": 0.1158, "step": 2746 }, { "epoch": 0.64, "learning_rate": 1.8259735467274765e-05, "loss": 0.0297, "step": 2747 }, { "epoch": 0.64, "learning_rate": 1.8258306560796324e-05, "loss": 0.108, "step": 2748 }, { "epoch": 0.65, "learning_rate": 1.8256877123888457e-05, "loss": 0.0726, "step": 2749 }, { "epoch": 0.65, "learning_rate": 1.8255447156642975e-05, "loss": 0.1183, "step": 2750 }, { "epoch": 0.65, "learning_rate": 1.825401665915173e-05, "loss": 0.0767, "step": 2751 }, { "epoch": 0.65, "learning_rate": 1.8252585631506597e-05, "loss": 0.0703, "step": 2752 }, { "epoch": 0.65, "learning_rate": 1.8251154073799494e-05, "loss": 0.0895, "step": 2753 }, { "epoch": 0.65, "learning_rate": 1.8249721986122367e-05, "loss": 0.0978, "step": 2754 }, { "epoch": 0.65, "learning_rate": 1.82482893685672e-05, "loss": 0.0505, "step": 2755 }, { "epoch": 0.65, "learning_rate": 1.824685622122601e-05, "loss": 0.0463, "step": 2756 }, { "epoch": 0.65, "learning_rate": 1.8245422544190852e-05, "loss": 0.0285, "step": 2757 }, { "epoch": 0.65, "learning_rate": 1.82439883375538e-05, "loss": 0.112, "step": 2758 }, { "epoch": 0.65, "learning_rate": 1.8242553601406984e-05, "loss": 0.039, "step": 2759 }, { "epoch": 0.65, "learning_rate": 1.824111833584255e-05, "loss": 0.0175, "step": 2760 }, { "epoch": 0.65, "learning_rate": 1.8239682540952684e-05, "loss": 0.0593, "step": 2761 }, { "epoch": 0.65, "learning_rate": 1.8238246216829613e-05, "loss": 0.0579, "step": 2762 }, { "epoch": 0.65, "learning_rate": 1.823680936356559e-05, "loss": 0.1409, "step": 2763 }, { "epoch": 0.65, "learning_rate": 1.82353719812529e-05, "loss": 0.0454, "step": 2764 }, { "epoch": 0.65, "learning_rate": 1.823393406998387e-05, "loss": 0.068, "step": 2765 }, { "epoch": 0.65, "learning_rate": 1.8232495629850855e-05, "loss": 0.0299, "step": 2766 }, { "epoch": 0.65, "learning_rate": 1.8231056660946245e-05, "loss": 0.0384, "step": 2767 }, { "epoch": 0.65, "learning_rate": 1.8229617163362463e-05, "loss": 0.0645, "step": 2768 }, { "epoch": 0.65, "learning_rate": 1.822817713719197e-05, "loss": 0.0553, "step": 2769 }, { "epoch": 0.65, "learning_rate": 1.8226736582527263e-05, "loss": 0.085, "step": 2770 }, { "epoch": 0.65, "learning_rate": 1.822529549946086e-05, "loss": 0.0658, "step": 2771 }, { "epoch": 0.65, "learning_rate": 1.822385388808533e-05, "loss": 0.017, "step": 2772 }, { "epoch": 0.65, "learning_rate": 1.8222411748493258e-05, "loss": 0.0499, "step": 2773 }, { "epoch": 0.65, "learning_rate": 1.822096908077728e-05, "loss": 0.0473, "step": 2774 }, { "epoch": 0.65, "learning_rate": 1.8219525885030055e-05, "loss": 0.0289, "step": 2775 }, { "epoch": 0.65, "learning_rate": 1.821808216134428e-05, "loss": 0.0102, "step": 2776 }, { "epoch": 0.65, "learning_rate": 1.8216637909812684e-05, "loss": 0.0727, "step": 2777 }, { "epoch": 0.65, "learning_rate": 1.8215193130528034e-05, "loss": 0.0615, "step": 2778 }, { "epoch": 0.65, "learning_rate": 1.8213747823583125e-05, "loss": 0.0169, "step": 2779 }, { "epoch": 0.65, "learning_rate": 1.821230198907079e-05, "loss": 0.0495, "step": 2780 }, { "epoch": 0.65, "learning_rate": 1.8210855627083894e-05, "loss": 0.0214, "step": 2781 }, { "epoch": 0.65, "learning_rate": 1.8209408737715337e-05, "loss": 0.1556, "step": 2782 }, { "epoch": 0.65, "learning_rate": 1.820796132105805e-05, "loss": 0.1271, "step": 2783 }, { "epoch": 0.65, "learning_rate": 1.8206513377205003e-05, "loss": 0.174, "step": 2784 }, { "epoch": 0.65, "learning_rate": 1.8205064906249198e-05, "loss": 0.1141, "step": 2785 }, { "epoch": 0.65, "learning_rate": 1.820361590828367e-05, "loss": 0.055, "step": 2786 }, { "epoch": 0.65, "learning_rate": 1.820216638340148e-05, "loss": 0.0983, "step": 2787 }, { "epoch": 0.65, "learning_rate": 1.8200716331695748e-05, "loss": 0.0475, "step": 2788 }, { "epoch": 0.65, "learning_rate": 1.819926575325959e-05, "loss": 0.0979, "step": 2789 }, { "epoch": 0.65, "learning_rate": 1.819781464818619e-05, "loss": 0.073, "step": 2790 }, { "epoch": 0.66, "learning_rate": 1.8196363016568753e-05, "loss": 0.0728, "step": 2791 }, { "epoch": 0.66, "learning_rate": 1.8194910858500508e-05, "loss": 0.0648, "step": 2792 }, { "epoch": 0.66, "learning_rate": 1.8193458174074733e-05, "loss": 0.0419, "step": 2793 }, { "epoch": 0.66, "learning_rate": 1.819200496338473e-05, "loss": 0.0718, "step": 2794 }, { "epoch": 0.66, "learning_rate": 1.8190551226523838e-05, "loss": 0.0485, "step": 2795 }, { "epoch": 0.66, "learning_rate": 1.8189096963585434e-05, "loss": 0.0281, "step": 2796 }, { "epoch": 0.66, "learning_rate": 1.8187642174662924e-05, "loss": 0.0739, "step": 2797 }, { "epoch": 0.66, "learning_rate": 1.818618685984975e-05, "loss": 0.0717, "step": 2798 }, { "epoch": 0.66, "learning_rate": 1.8184731019239385e-05, "loss": 0.0529, "step": 2799 }, { "epoch": 0.66, "learning_rate": 1.8183274652925337e-05, "loss": 0.0398, "step": 2800 }, { "epoch": 0.66, "learning_rate": 1.8181817761001145e-05, "loss": 0.0342, "step": 2801 }, { "epoch": 0.66, "learning_rate": 1.8180360343560392e-05, "loss": 0.0331, "step": 2802 }, { "epoch": 0.66, "learning_rate": 1.8178902400696683e-05, "loss": 0.112, "step": 2803 }, { "epoch": 0.66, "learning_rate": 1.817744393250366e-05, "loss": 0.0325, "step": 2804 }, { "epoch": 0.66, "learning_rate": 1.8175984939075006e-05, "loss": 0.0924, "step": 2805 }, { "epoch": 0.66, "learning_rate": 1.8174525420504426e-05, "loss": 0.0459, "step": 2806 }, { "epoch": 0.66, "learning_rate": 1.817306537688567e-05, "loss": 0.0575, "step": 2807 }, { "epoch": 0.66, "learning_rate": 1.817160480831251e-05, "loss": 0.1075, "step": 2808 }, { "epoch": 0.66, "learning_rate": 1.8170143714878764e-05, "loss": 0.0421, "step": 2809 }, { "epoch": 0.66, "learning_rate": 1.8168682096678272e-05, "loss": 0.029, "step": 2810 }, { "epoch": 0.66, "learning_rate": 1.8167219953804916e-05, "loss": 0.0475, "step": 2811 }, { "epoch": 0.66, "learning_rate": 1.8165757286352614e-05, "loss": 0.0399, "step": 2812 }, { "epoch": 0.66, "learning_rate": 1.8164294094415305e-05, "loss": 0.0656, "step": 2813 }, { "epoch": 0.66, "learning_rate": 1.8162830378086977e-05, "loss": 0.0503, "step": 2814 }, { "epoch": 0.66, "learning_rate": 1.8161366137461635e-05, "loss": 0.0526, "step": 2815 }, { "epoch": 0.66, "learning_rate": 1.8159901372633336e-05, "loss": 0.0559, "step": 2816 }, { "epoch": 0.66, "learning_rate": 1.8158436083696156e-05, "loss": 0.1103, "step": 2817 }, { "epoch": 0.66, "learning_rate": 1.8156970270744212e-05, "loss": 0.0989, "step": 2818 }, { "epoch": 0.66, "learning_rate": 1.8155503933871653e-05, "loss": 0.0233, "step": 2819 }, { "epoch": 0.66, "learning_rate": 1.815403707317266e-05, "loss": 0.1429, "step": 2820 }, { "epoch": 0.66, "learning_rate": 1.8152569688741454e-05, "loss": 0.1231, "step": 2821 }, { "epoch": 0.66, "learning_rate": 1.815110178067228e-05, "loss": 0.0623, "step": 2822 }, { "epoch": 0.66, "learning_rate": 1.8149633349059418e-05, "loss": 0.0878, "step": 2823 }, { "epoch": 0.66, "learning_rate": 1.8148164393997194e-05, "loss": 0.0665, "step": 2824 }, { "epoch": 0.66, "learning_rate": 1.8146694915579953e-05, "loss": 0.0478, "step": 2825 }, { "epoch": 0.66, "learning_rate": 1.8145224913902085e-05, "loss": 0.0509, "step": 2826 }, { "epoch": 0.66, "learning_rate": 1.8143754389058e-05, "loss": 0.0454, "step": 2827 }, { "epoch": 0.66, "learning_rate": 1.8142283341142155e-05, "loss": 0.0798, "step": 2828 }, { "epoch": 0.66, "learning_rate": 1.8140811770249032e-05, "loss": 0.0703, "step": 2829 }, { "epoch": 0.66, "learning_rate": 1.813933967647315e-05, "loss": 0.0447, "step": 2830 }, { "epoch": 0.66, "learning_rate": 1.8137867059909066e-05, "loss": 0.0789, "step": 2831 }, { "epoch": 0.66, "learning_rate": 1.8136393920651358e-05, "loss": 0.0162, "step": 2832 }, { "epoch": 0.66, "learning_rate": 1.8134920258794653e-05, "loss": 0.1051, "step": 2833 }, { "epoch": 0.67, "learning_rate": 1.81334460744336e-05, "loss": 0.0249, "step": 2834 }, { "epoch": 0.67, "learning_rate": 1.8131971367662887e-05, "loss": 0.0826, "step": 2835 }, { "epoch": 0.67, "learning_rate": 1.8130496138577234e-05, "loss": 0.05, "step": 2836 }, { "epoch": 0.67, "learning_rate": 1.8129020387271392e-05, "loss": 0.0538, "step": 2837 }, { "epoch": 0.67, "learning_rate": 1.812754411384015e-05, "loss": 0.0447, "step": 2838 }, { "epoch": 0.67, "learning_rate": 1.8126067318378332e-05, "loss": 0.039, "step": 2839 }, { "epoch": 0.67, "learning_rate": 1.8124590000980785e-05, "loss": 0.0837, "step": 2840 }, { "epoch": 0.67, "learning_rate": 1.8123112161742407e-05, "loss": 0.1848, "step": 2841 }, { "epoch": 0.67, "learning_rate": 1.812163380075811e-05, "loss": 0.1112, "step": 2842 }, { "epoch": 0.67, "learning_rate": 1.812015491812285e-05, "loss": 0.1151, "step": 2843 }, { "epoch": 0.67, "learning_rate": 1.811867551393162e-05, "loss": 0.0383, "step": 2844 }, { "epoch": 0.67, "learning_rate": 1.8117195588279442e-05, "loss": 0.0569, "step": 2845 }, { "epoch": 0.67, "learning_rate": 1.8115715141261366e-05, "loss": 0.0204, "step": 2846 }, { "epoch": 0.67, "learning_rate": 1.8114234172972486e-05, "loss": 0.037, "step": 2847 }, { "epoch": 0.67, "learning_rate": 1.811275268350792e-05, "loss": 0.0826, "step": 2848 }, { "epoch": 0.67, "learning_rate": 1.811127067296283e-05, "loss": 0.0929, "step": 2849 }, { "epoch": 0.67, "learning_rate": 1.8109788141432396e-05, "loss": 0.0399, "step": 2850 }, { "epoch": 0.67, "learning_rate": 1.8108305089011848e-05, "loss": 0.0872, "step": 2851 }, { "epoch": 0.67, "learning_rate": 1.810682151579644e-05, "loss": 0.0466, "step": 2852 }, { "epoch": 0.67, "learning_rate": 1.810533742188146e-05, "loss": 0.0526, "step": 2853 }, { "epoch": 0.67, "learning_rate": 1.8103852807362234e-05, "loss": 0.057, "step": 2854 }, { "epoch": 0.67, "learning_rate": 1.8102367672334117e-05, "loss": 0.0371, "step": 2855 }, { "epoch": 0.67, "learning_rate": 1.81008820168925e-05, "loss": 0.1472, "step": 2856 }, { "epoch": 0.67, "learning_rate": 1.80993958411328e-05, "loss": 0.0388, "step": 2857 }, { "epoch": 0.67, "learning_rate": 1.8097909145150486e-05, "loss": 0.0726, "step": 2858 }, { "epoch": 0.67, "learning_rate": 1.809642192904104e-05, "loss": 0.0273, "step": 2859 }, { "epoch": 0.67, "learning_rate": 1.8094934192899988e-05, "loss": 0.0666, "step": 2860 }, { "epoch": 0.67, "learning_rate": 1.8093445936822883e-05, "loss": 0.0704, "step": 2861 }, { "epoch": 0.67, "learning_rate": 1.8091957160905317e-05, "loss": 0.0465, "step": 2862 }, { "epoch": 0.67, "learning_rate": 1.809046786524292e-05, "loss": 0.1577, "step": 2863 }, { "epoch": 0.67, "learning_rate": 1.808897804993134e-05, "loss": 0.0521, "step": 2864 }, { "epoch": 0.67, "learning_rate": 1.8087487715066275e-05, "loss": 0.0551, "step": 2865 }, { "epoch": 0.67, "learning_rate": 1.8085996860743444e-05, "loss": 0.0959, "step": 2866 }, { "epoch": 0.67, "learning_rate": 1.8084505487058608e-05, "loss": 0.0666, "step": 2867 }, { "epoch": 0.67, "learning_rate": 1.8083013594107552e-05, "loss": 0.0621, "step": 2868 }, { "epoch": 0.67, "learning_rate": 1.8081521181986107e-05, "loss": 0.1299, "step": 2869 }, { "epoch": 0.67, "learning_rate": 1.8080028250790128e-05, "loss": 0.0198, "step": 2870 }, { "epoch": 0.67, "learning_rate": 1.8078534800615504e-05, "loss": 0.0881, "step": 2871 }, { "epoch": 0.67, "learning_rate": 1.807704083155816e-05, "loss": 0.0355, "step": 2872 }, { "epoch": 0.67, "learning_rate": 1.807554634371405e-05, "loss": 0.0504, "step": 2873 }, { "epoch": 0.67, "learning_rate": 1.807405133717917e-05, "loss": 0.071, "step": 2874 }, { "epoch": 0.67, "learning_rate": 1.8072555812049542e-05, "loss": 0.0464, "step": 2875 }, { "epoch": 0.67, "learning_rate": 1.807105976842122e-05, "loss": 0.01, "step": 2876 }, { "epoch": 0.68, "learning_rate": 1.80695632063903e-05, "loss": 0.0545, "step": 2877 }, { "epoch": 0.68, "learning_rate": 1.8068066126052907e-05, "loss": 0.0866, "step": 2878 }, { "epoch": 0.68, "learning_rate": 1.806656852750519e-05, "loss": 0.0553, "step": 2879 }, { "epoch": 0.68, "learning_rate": 1.8065070410843345e-05, "loss": 0.0148, "step": 2880 }, { "epoch": 0.68, "learning_rate": 1.8063571776163596e-05, "loss": 0.1591, "step": 2881 }, { "epoch": 0.68, "learning_rate": 1.80620726235622e-05, "loss": 0.011, "step": 2882 }, { "epoch": 0.68, "learning_rate": 1.8060572953135446e-05, "loss": 0.0129, "step": 2883 }, { "epoch": 0.68, "learning_rate": 1.8059072764979657e-05, "loss": 0.0965, "step": 2884 }, { "epoch": 0.68, "learning_rate": 1.8057572059191192e-05, "loss": 0.0493, "step": 2885 }, { "epoch": 0.68, "learning_rate": 1.8056070835866437e-05, "loss": 0.0573, "step": 2886 }, { "epoch": 0.68, "learning_rate": 1.805456909510182e-05, "loss": 0.0283, "step": 2887 }, { "epoch": 0.68, "learning_rate": 1.8053066836993793e-05, "loss": 0.0138, "step": 2888 }, { "epoch": 0.68, "learning_rate": 1.805156406163885e-05, "loss": 0.1397, "step": 2889 }, { "epoch": 0.68, "learning_rate": 1.805006076913351e-05, "loss": 0.0157, "step": 2890 }, { "epoch": 0.68, "learning_rate": 1.8048556959574334e-05, "loss": 0.0285, "step": 2891 }, { "epoch": 0.68, "learning_rate": 1.8047052633057907e-05, "loss": 0.1035, "step": 2892 }, { "epoch": 0.68, "learning_rate": 1.8045547789680852e-05, "loss": 0.0501, "step": 2893 }, { "epoch": 0.68, "learning_rate": 1.8044042429539827e-05, "loss": 0.1146, "step": 2894 }, { "epoch": 0.68, "learning_rate": 1.8042536552731523e-05, "loss": 0.0211, "step": 2895 }, { "epoch": 0.68, "learning_rate": 1.804103015935266e-05, "loss": 0.0502, "step": 2896 }, { "epoch": 0.68, "learning_rate": 1.8039523249499987e-05, "loss": 0.1358, "step": 2897 }, { "epoch": 0.68, "learning_rate": 1.8038015823270302e-05, "loss": 0.0735, "step": 2898 }, { "epoch": 0.68, "learning_rate": 1.8036507880760422e-05, "loss": 0.0281, "step": 2899 }, { "epoch": 0.68, "learning_rate": 1.80349994220672e-05, "loss": 0.0739, "step": 2900 }, { "epoch": 0.68, "learning_rate": 1.803349044728753e-05, "loss": 0.0434, "step": 2901 }, { "epoch": 0.68, "learning_rate": 1.803198095651833e-05, "loss": 0.0612, "step": 2902 }, { "epoch": 0.68, "learning_rate": 1.8030470949856554e-05, "loss": 0.0808, "step": 2903 }, { "epoch": 0.68, "learning_rate": 1.8028960427399188e-05, "loss": 0.1642, "step": 2904 }, { "epoch": 0.68, "learning_rate": 1.802744938924325e-05, "loss": 0.0831, "step": 2905 }, { "epoch": 0.68, "learning_rate": 1.802593783548581e-05, "loss": 0.15, "step": 2906 }, { "epoch": 0.68, "learning_rate": 1.8024425766223932e-05, "loss": 0.0481, "step": 2907 }, { "epoch": 0.68, "learning_rate": 1.8022913181554748e-05, "loss": 0.0615, "step": 2908 }, { "epoch": 0.68, "learning_rate": 1.8021400081575412e-05, "loss": 0.0624, "step": 2909 }, { "epoch": 0.68, "learning_rate": 1.801988646638311e-05, "loss": 0.082, "step": 2910 }, { "epoch": 0.68, "learning_rate": 1.8018372336075054e-05, "loss": 0.0907, "step": 2911 }, { "epoch": 0.68, "learning_rate": 1.8016857690748503e-05, "loss": 0.0682, "step": 2912 }, { "epoch": 0.68, "learning_rate": 1.801534253050074e-05, "loss": 0.1287, "step": 2913 }, { "epoch": 0.68, "learning_rate": 1.8013826855429085e-05, "loss": 0.0646, "step": 2914 }, { "epoch": 0.68, "learning_rate": 1.8012310665630888e-05, "loss": 0.0508, "step": 2915 }, { "epoch": 0.68, "learning_rate": 1.8010793961203537e-05, "loss": 0.0296, "step": 2916 }, { "epoch": 0.68, "learning_rate": 1.8009276742244443e-05, "loss": 0.0631, "step": 2917 }, { "epoch": 0.68, "learning_rate": 1.8007759008851062e-05, "loss": 0.0523, "step": 2918 }, { "epoch": 0.69, "learning_rate": 1.8006240761120875e-05, "loss": 0.0603, "step": 2919 }, { "epoch": 0.69, "learning_rate": 1.8004721999151403e-05, "loss": 0.0316, "step": 2920 }, { "epoch": 0.69, "learning_rate": 1.8003202723040188e-05, "loss": 0.1075, "step": 2921 }, { "epoch": 0.69, "learning_rate": 1.800168293288482e-05, "loss": 0.0436, "step": 2922 }, { "epoch": 0.69, "learning_rate": 1.800016262878291e-05, "loss": 0.1813, "step": 2923 }, { "epoch": 0.69, "learning_rate": 1.7998641810832114e-05, "loss": 0.1318, "step": 2924 }, { "epoch": 0.69, "learning_rate": 1.7997120479130105e-05, "loss": 0.0471, "step": 2925 }, { "epoch": 0.69, "learning_rate": 1.7995598633774604e-05, "loss": 0.0613, "step": 2926 }, { "epoch": 0.69, "learning_rate": 1.7994076274863356e-05, "loss": 0.0714, "step": 2927 }, { "epoch": 0.69, "learning_rate": 1.7992553402494142e-05, "loss": 0.0778, "step": 2928 }, { "epoch": 0.69, "learning_rate": 1.7991030016764777e-05, "loss": 0.0403, "step": 2929 }, { "epoch": 0.69, "learning_rate": 1.798950611777311e-05, "loss": 0.0312, "step": 2930 }, { "epoch": 0.69, "learning_rate": 1.798798170561701e-05, "loss": 0.0565, "step": 2931 }, { "epoch": 0.69, "learning_rate": 1.7986456780394403e-05, "loss": 0.0671, "step": 2932 }, { "epoch": 0.69, "learning_rate": 1.7984931342203228e-05, "loss": 0.0571, "step": 2933 }, { "epoch": 0.69, "learning_rate": 1.7983405391141463e-05, "loss": 0.0914, "step": 2934 }, { "epoch": 0.69, "learning_rate": 1.7981878927307125e-05, "loss": 0.0425, "step": 2935 }, { "epoch": 0.69, "learning_rate": 1.7980351950798255e-05, "loss": 0.0172, "step": 2936 }, { "epoch": 0.69, "learning_rate": 1.7978824461712923e-05, "loss": 0.2365, "step": 2937 }, { "epoch": 0.69, "learning_rate": 1.7977296460149253e-05, "loss": 0.0372, "step": 2938 }, { "epoch": 0.69, "learning_rate": 1.7975767946205382e-05, "loss": 0.0555, "step": 2939 }, { "epoch": 0.69, "learning_rate": 1.7974238919979484e-05, "loss": 0.0398, "step": 2940 }, { "epoch": 0.69, "learning_rate": 1.797270938156977e-05, "loss": 0.0653, "step": 2941 }, { "epoch": 0.69, "learning_rate": 1.797117933107448e-05, "loss": 0.0914, "step": 2942 }, { "epoch": 0.69, "learning_rate": 1.7969648768591895e-05, "loss": 0.0493, "step": 2943 }, { "epoch": 0.69, "learning_rate": 1.7968117694220314e-05, "loss": 0.0859, "step": 2944 }, { "epoch": 0.69, "learning_rate": 1.7966586108058087e-05, "loss": 0.0767, "step": 2945 }, { "epoch": 0.69, "learning_rate": 1.796505401020358e-05, "loss": 0.0322, "step": 2946 }, { "epoch": 0.69, "learning_rate": 1.79635214007552e-05, "loss": 0.056, "step": 2947 }, { "epoch": 0.69, "learning_rate": 1.796198827981139e-05, "loss": 0.0414, "step": 2948 }, { "epoch": 0.69, "learning_rate": 1.796045464747062e-05, "loss": 0.0398, "step": 2949 }, { "epoch": 0.69, "learning_rate": 1.7958920503831396e-05, "loss": 0.0282, "step": 2950 }, { "epoch": 0.69, "learning_rate": 1.7957385848992254e-05, "loss": 0.0946, "step": 2951 }, { "epoch": 0.69, "learning_rate": 1.795585068305176e-05, "loss": 0.0389, "step": 2952 }, { "epoch": 0.69, "learning_rate": 1.795431500610853e-05, "loss": 0.035, "step": 2953 }, { "epoch": 0.69, "learning_rate": 1.795277881826119e-05, "loss": 0.053, "step": 2954 }, { "epoch": 0.69, "learning_rate": 1.7951242119608415e-05, "loss": 0.0362, "step": 2955 }, { "epoch": 0.69, "learning_rate": 1.7949704910248903e-05, "loss": 0.0626, "step": 2956 }, { "epoch": 0.69, "learning_rate": 1.794816719028139e-05, "loss": 0.2676, "step": 2957 }, { "epoch": 0.69, "learning_rate": 1.794662895980464e-05, "loss": 0.0265, "step": 2958 }, { "epoch": 0.69, "learning_rate": 1.7945090218917456e-05, "loss": 0.0353, "step": 2959 }, { "epoch": 0.69, "learning_rate": 1.7943550967718675e-05, "loss": 0.0608, "step": 2960 }, { "epoch": 0.69, "learning_rate": 1.794201120630716e-05, "loss": 0.0214, "step": 2961 }, { "epoch": 0.7, "learning_rate": 1.7940470934781803e-05, "loss": 0.106, "step": 2962 }, { "epoch": 0.7, "learning_rate": 1.7938930153241546e-05, "loss": 0.0871, "step": 2963 }, { "epoch": 0.7, "learning_rate": 1.7937388861785347e-05, "loss": 0.0788, "step": 2964 }, { "epoch": 0.7, "learning_rate": 1.7935847060512204e-05, "loss": 0.0642, "step": 2965 }, { "epoch": 0.7, "learning_rate": 1.7934304749521145e-05, "loss": 0.0317, "step": 2966 }, { "epoch": 0.7, "learning_rate": 1.7932761928911233e-05, "loss": 0.0969, "step": 2967 }, { "epoch": 0.7, "learning_rate": 1.7931218598781568e-05, "loss": 0.0648, "step": 2968 }, { "epoch": 0.7, "learning_rate": 1.792967475923127e-05, "loss": 0.0189, "step": 2969 }, { "epoch": 0.7, "learning_rate": 1.7928130410359505e-05, "loss": 0.0349, "step": 2970 }, { "epoch": 0.7, "learning_rate": 1.7926585552265466e-05, "loss": 0.131, "step": 2971 }, { "epoch": 0.7, "learning_rate": 1.7925040185048376e-05, "loss": 0.0855, "step": 2972 }, { "epoch": 0.7, "learning_rate": 1.7923494308807497e-05, "loss": 0.1291, "step": 2973 }, { "epoch": 0.7, "learning_rate": 1.7921947923642114e-05, "loss": 0.0756, "step": 2974 }, { "epoch": 0.7, "learning_rate": 1.792040102965156e-05, "loss": 0.0066, "step": 2975 }, { "epoch": 0.7, "learning_rate": 1.7918853626935187e-05, "loss": 0.0891, "step": 2976 }, { "epoch": 0.7, "learning_rate": 1.7917305715592383e-05, "loss": 0.0303, "step": 2977 }, { "epoch": 0.7, "learning_rate": 1.7915757295722568e-05, "loss": 0.0702, "step": 2978 }, { "epoch": 0.7, "learning_rate": 1.7914208367425207e-05, "loss": 0.0553, "step": 2979 }, { "epoch": 0.7, "learning_rate": 1.7912658930799776e-05, "loss": 0.0742, "step": 2980 }, { "epoch": 0.7, "learning_rate": 1.7911108985945802e-05, "loss": 0.0873, "step": 2981 }, { "epoch": 0.7, "learning_rate": 1.7909558532962835e-05, "loss": 0.049, "step": 2982 }, { "epoch": 0.7, "learning_rate": 1.7908007571950457e-05, "loss": 0.0549, "step": 2983 }, { "epoch": 0.7, "learning_rate": 1.7906456103008295e-05, "loss": 0.0707, "step": 2984 }, { "epoch": 0.7, "learning_rate": 1.7904904126235993e-05, "loss": 0.0406, "step": 2985 }, { "epoch": 0.7, "learning_rate": 1.7903351641733233e-05, "loss": 0.0213, "step": 2986 }, { "epoch": 0.7, "learning_rate": 1.790179864959973e-05, "loss": 0.1139, "step": 2987 }, { "epoch": 0.7, "learning_rate": 1.790024514993524e-05, "loss": 0.0922, "step": 2988 }, { "epoch": 0.7, "learning_rate": 1.789869114283954e-05, "loss": 0.158, "step": 2989 }, { "epoch": 0.7, "learning_rate": 1.7897136628412443e-05, "loss": 0.12, "step": 2990 }, { "epoch": 0.7, "learning_rate": 1.7895581606753793e-05, "loss": 0.0211, "step": 2991 }, { "epoch": 0.7, "learning_rate": 1.7894026077963473e-05, "loss": 0.0832, "step": 2992 }, { "epoch": 0.7, "learning_rate": 1.7892470042141393e-05, "loss": 0.104, "step": 2993 }, { "epoch": 0.7, "learning_rate": 1.7890913499387492e-05, "loss": 0.0552, "step": 2994 }, { "epoch": 0.7, "learning_rate": 1.7889356449801754e-05, "loss": 0.0725, "step": 2995 }, { "epoch": 0.7, "learning_rate": 1.7887798893484183e-05, "loss": 0.1738, "step": 2996 }, { "epoch": 0.7, "learning_rate": 1.7886240830534823e-05, "loss": 0.0679, "step": 2997 }, { "epoch": 0.7, "learning_rate": 1.7884682261053744e-05, "loss": 0.0258, "step": 2998 }, { "epoch": 0.7, "learning_rate": 1.7883123185141058e-05, "loss": 0.0497, "step": 2999 }, { "epoch": 0.7, "learning_rate": 1.7881563602896904e-05, "loss": 0.0565, "step": 3000 }, { "epoch": 0.7, "learning_rate": 1.7880003514421445e-05, "loss": 0.0908, "step": 3001 }, { "epoch": 0.7, "learning_rate": 1.78784429198149e-05, "loss": 0.0544, "step": 3002 }, { "epoch": 0.7, "learning_rate": 1.7876881819177492e-05, "loss": 0.0464, "step": 3003 }, { "epoch": 0.71, "learning_rate": 1.7875320212609498e-05, "loss": 0.028, "step": 3004 }, { "epoch": 0.71, "learning_rate": 1.7873758100211218e-05, "loss": 0.0524, "step": 3005 }, { "epoch": 0.71, "learning_rate": 1.7872195482082982e-05, "loss": 0.0569, "step": 3006 }, { "epoch": 0.71, "learning_rate": 1.787063235832516e-05, "loss": 0.0485, "step": 3007 }, { "epoch": 0.71, "learning_rate": 1.7869068729038158e-05, "loss": 0.0616, "step": 3008 }, { "epoch": 0.71, "learning_rate": 1.7867504594322394e-05, "loss": 0.041, "step": 3009 }, { "epoch": 0.71, "learning_rate": 1.786593995427834e-05, "loss": 0.0292, "step": 3010 }, { "epoch": 0.71, "learning_rate": 1.7864374809006494e-05, "loss": 0.0279, "step": 3011 }, { "epoch": 0.71, "learning_rate": 1.7862809158607383e-05, "loss": 0.1293, "step": 3012 }, { "epoch": 0.71, "learning_rate": 1.7861243003181563e-05, "loss": 0.0898, "step": 3013 }, { "epoch": 0.71, "learning_rate": 1.7859676342829635e-05, "loss": 0.0511, "step": 3014 }, { "epoch": 0.71, "learning_rate": 1.7858109177652224e-05, "loss": 0.0645, "step": 3015 }, { "epoch": 0.71, "learning_rate": 1.785654150774999e-05, "loss": 0.109, "step": 3016 }, { "epoch": 0.71, "learning_rate": 1.785497333322362e-05, "loss": 0.0139, "step": 3017 }, { "epoch": 0.71, "learning_rate": 1.7853404654173837e-05, "loss": 0.0793, "step": 3018 }, { "epoch": 0.71, "learning_rate": 1.78518354707014e-05, "loss": 0.0541, "step": 3019 }, { "epoch": 0.71, "learning_rate": 1.7850265782907097e-05, "loss": 0.086, "step": 3020 }, { "epoch": 0.71, "learning_rate": 1.7848695590891746e-05, "loss": 0.062, "step": 3021 }, { "epoch": 0.71, "learning_rate": 1.7847124894756206e-05, "loss": 0.0356, "step": 3022 }, { "epoch": 0.71, "learning_rate": 1.784555369460136e-05, "loss": 0.1124, "step": 3023 }, { "epoch": 0.71, "learning_rate": 1.7843981990528127e-05, "loss": 0.053, "step": 3024 }, { "epoch": 0.71, "learning_rate": 1.784240978263745e-05, "loss": 0.0208, "step": 3025 }, { "epoch": 0.71, "learning_rate": 1.7840837071030318e-05, "loss": 0.0532, "step": 3026 }, { "epoch": 0.71, "learning_rate": 1.7839263855807744e-05, "loss": 0.0443, "step": 3027 }, { "epoch": 0.71, "learning_rate": 1.7837690137070778e-05, "loss": 0.0476, "step": 3028 }, { "epoch": 0.71, "learning_rate": 1.7836115914920498e-05, "loss": 0.0363, "step": 3029 }, { "epoch": 0.71, "learning_rate": 1.7834541189458013e-05, "loss": 0.0409, "step": 3030 }, { "epoch": 0.71, "learning_rate": 1.7832965960784472e-05, "loss": 0.1634, "step": 3031 }, { "epoch": 0.71, "learning_rate": 1.783139022900105e-05, "loss": 0.0437, "step": 3032 }, { "epoch": 0.71, "learning_rate": 1.7829813994208956e-05, "loss": 0.1113, "step": 3033 }, { "epoch": 0.71, "learning_rate": 1.7828237256509428e-05, "loss": 0.055, "step": 3034 }, { "epoch": 0.71, "learning_rate": 1.7826660016003746e-05, "loss": 0.0813, "step": 3035 }, { "epoch": 0.71, "learning_rate": 1.782508227279321e-05, "loss": 0.1144, "step": 3036 }, { "epoch": 0.71, "learning_rate": 1.7823504026979162e-05, "loss": 0.0348, "step": 3037 }, { "epoch": 0.71, "learning_rate": 1.782192527866297e-05, "loss": 0.0342, "step": 3038 }, { "epoch": 0.71, "learning_rate": 1.7820346027946035e-05, "loss": 0.0801, "step": 3039 }, { "epoch": 0.71, "learning_rate": 1.7818766274929795e-05, "loss": 0.1368, "step": 3040 }, { "epoch": 0.71, "learning_rate": 1.781718601971572e-05, "loss": 0.0643, "step": 3041 }, { "epoch": 0.71, "learning_rate": 1.7815605262405308e-05, "loss": 0.055, "step": 3042 }, { "epoch": 0.71, "learning_rate": 1.7814024003100085e-05, "loss": 0.0881, "step": 3043 }, { "epoch": 0.71, "learning_rate": 1.7812442241901616e-05, "loss": 0.0692, "step": 3044 }, { "epoch": 0.71, "learning_rate": 1.7810859978911502e-05, "loss": 0.0789, "step": 3045 }, { "epoch": 0.71, "learning_rate": 1.7809277214231376e-05, "loss": 0.0771, "step": 3046 }, { "epoch": 0.72, "learning_rate": 1.7807693947962886e-05, "loss": 0.1261, "step": 3047 }, { "epoch": 0.72, "learning_rate": 1.7806110180207733e-05, "loss": 0.0615, "step": 3048 }, { "epoch": 0.72, "learning_rate": 1.780452591106764e-05, "loss": 0.0371, "step": 3049 }, { "epoch": 0.72, "learning_rate": 1.7802941140644363e-05, "loss": 0.0338, "step": 3050 }, { "epoch": 0.72, "learning_rate": 1.7801355869039692e-05, "loss": 0.0164, "step": 3051 }, { "epoch": 0.72, "learning_rate": 1.7799770096355452e-05, "loss": 0.0951, "step": 3052 }, { "epoch": 0.72, "learning_rate": 1.7798183822693498e-05, "loss": 0.0323, "step": 3053 }, { "epoch": 0.72, "learning_rate": 1.7796597048155707e-05, "loss": 0.0829, "step": 3054 }, { "epoch": 0.72, "learning_rate": 1.7795009772844004e-05, "loss": 0.0677, "step": 3055 }, { "epoch": 0.72, "learning_rate": 1.7793421996860342e-05, "loss": 0.01, "step": 3056 }, { "epoch": 0.72, "learning_rate": 1.7791833720306696e-05, "loss": 0.032, "step": 3057 }, { "epoch": 0.72, "learning_rate": 1.7790244943285087e-05, "loss": 0.078, "step": 3058 }, { "epoch": 0.72, "learning_rate": 1.7788655665897555e-05, "loss": 0.0552, "step": 3059 }, { "epoch": 0.72, "learning_rate": 1.7787065888246188e-05, "loss": 0.0768, "step": 3060 }, { "epoch": 0.72, "learning_rate": 1.778547561043309e-05, "loss": 0.0817, "step": 3061 }, { "epoch": 0.72, "learning_rate": 1.7783884832560407e-05, "loss": 0.0632, "step": 3062 }, { "epoch": 0.72, "learning_rate": 1.7782293554730312e-05, "loss": 0.0827, "step": 3063 }, { "epoch": 0.72, "learning_rate": 1.7780701777045017e-05, "loss": 0.0785, "step": 3064 }, { "epoch": 0.72, "learning_rate": 1.777910949960676e-05, "loss": 0.0263, "step": 3065 }, { "epoch": 0.72, "learning_rate": 1.777751672251781e-05, "loss": 0.0469, "step": 3066 }, { "epoch": 0.72, "learning_rate": 1.777592344588047e-05, "loss": 0.1029, "step": 3067 }, { "epoch": 0.72, "learning_rate": 1.7774329669797083e-05, "loss": 0.0137, "step": 3068 }, { "epoch": 0.72, "learning_rate": 1.7772735394370008e-05, "loss": 0.0424, "step": 3069 }, { "epoch": 0.72, "learning_rate": 1.7771140619701653e-05, "loss": 0.0657, "step": 3070 }, { "epoch": 0.72, "learning_rate": 1.7769545345894442e-05, "loss": 0.0812, "step": 3071 }, { "epoch": 0.72, "learning_rate": 1.7767949573050847e-05, "loss": 0.0172, "step": 3072 }, { "epoch": 0.72, "learning_rate": 1.776635330127336e-05, "loss": 0.0287, "step": 3073 }, { "epoch": 0.72, "learning_rate": 1.7764756530664508e-05, "loss": 0.029, "step": 3074 }, { "epoch": 0.72, "learning_rate": 1.7763159261326856e-05, "loss": 0.0947, "step": 3075 }, { "epoch": 0.72, "learning_rate": 1.7761561493362993e-05, "loss": 0.0422, "step": 3076 }, { "epoch": 0.72, "learning_rate": 1.7759963226875544e-05, "loss": 0.0901, "step": 3077 }, { "epoch": 0.72, "learning_rate": 1.7758364461967162e-05, "loss": 0.0521, "step": 3078 }, { "epoch": 0.72, "learning_rate": 1.775676519874054e-05, "loss": 0.0414, "step": 3079 }, { "epoch": 0.72, "learning_rate": 1.7755165437298398e-05, "loss": 0.0577, "step": 3080 }, { "epoch": 0.72, "learning_rate": 1.7753565177743484e-05, "loss": 0.0481, "step": 3081 }, { "epoch": 0.72, "learning_rate": 1.7751964420178588e-05, "loss": 0.1721, "step": 3082 }, { "epoch": 0.72, "learning_rate": 1.7750363164706523e-05, "loss": 0.0584, "step": 3083 }, { "epoch": 0.72, "learning_rate": 1.774876141143014e-05, "loss": 0.098, "step": 3084 }, { "epoch": 0.72, "learning_rate": 1.7747159160452316e-05, "loss": 0.0558, "step": 3085 }, { "epoch": 0.72, "learning_rate": 1.7745556411875965e-05, "loss": 0.0257, "step": 3086 }, { "epoch": 0.72, "learning_rate": 1.7743953165804033e-05, "loss": 0.0956, "step": 3087 }, { "epoch": 0.72, "learning_rate": 1.774234942233949e-05, "loss": 0.0953, "step": 3088 }, { "epoch": 0.72, "learning_rate": 1.7740745181585352e-05, "loss": 0.0269, "step": 3089 }, { "epoch": 0.73, "learning_rate": 1.773914044364465e-05, "loss": 0.032, "step": 3090 }, { "epoch": 0.73, "learning_rate": 1.7737535208620468e-05, "loss": 0.1037, "step": 3091 }, { "epoch": 0.73, "learning_rate": 1.77359294766159e-05, "loss": 0.0536, "step": 3092 }, { "epoch": 0.73, "learning_rate": 1.7734323247734084e-05, "loss": 0.0328, "step": 3093 }, { "epoch": 0.73, "learning_rate": 1.773271652207819e-05, "loss": 0.0428, "step": 3094 }, { "epoch": 0.73, "learning_rate": 1.7731109299751416e-05, "loss": 0.123, "step": 3095 }, { "epoch": 0.73, "learning_rate": 1.7729501580856997e-05, "loss": 0.0363, "step": 3096 }, { "epoch": 0.73, "learning_rate": 1.772789336549819e-05, "loss": 0.0355, "step": 3097 }, { "epoch": 0.73, "learning_rate": 1.7726284653778297e-05, "loss": 0.0444, "step": 3098 }, { "epoch": 0.73, "learning_rate": 1.772467544580064e-05, "loss": 0.0322, "step": 3099 }, { "epoch": 0.73, "learning_rate": 1.772306574166858e-05, "loss": 0.0337, "step": 3100 }, { "epoch": 0.73, "learning_rate": 1.7721455541485513e-05, "loss": 0.0906, "step": 3101 }, { "epoch": 0.73, "learning_rate": 1.7719844845354856e-05, "loss": 0.014, "step": 3102 }, { "epoch": 0.73, "learning_rate": 1.7718233653380063e-05, "loss": 0.0826, "step": 3103 }, { "epoch": 0.73, "learning_rate": 1.771662196566462e-05, "loss": 0.0646, "step": 3104 }, { "epoch": 0.73, "learning_rate": 1.771500978231205e-05, "loss": 0.0793, "step": 3105 }, { "epoch": 0.73, "learning_rate": 1.77133971034259e-05, "loss": 0.0153, "step": 3106 }, { "epoch": 0.73, "learning_rate": 1.7711783929109758e-05, "loss": 0.0257, "step": 3107 }, { "epoch": 0.73, "learning_rate": 1.771017025946723e-05, "loss": 0.076, "step": 3108 }, { "epoch": 0.73, "learning_rate": 1.7708556094601964e-05, "loss": 0.0411, "step": 3109 }, { "epoch": 0.73, "learning_rate": 1.7706941434617637e-05, "loss": 0.025, "step": 3110 }, { "epoch": 0.73, "learning_rate": 1.7705326279617962e-05, "loss": 0.0482, "step": 3111 }, { "epoch": 0.73, "learning_rate": 1.7703710629706674e-05, "loss": 0.0714, "step": 3112 }, { "epoch": 0.73, "learning_rate": 1.7702094484987553e-05, "loss": 0.0868, "step": 3113 }, { "epoch": 0.73, "learning_rate": 1.77004778455644e-05, "loss": 0.0251, "step": 3114 }, { "epoch": 0.73, "learning_rate": 1.769886071154105e-05, "loss": 0.0245, "step": 3115 }, { "epoch": 0.73, "learning_rate": 1.7697243083021373e-05, "loss": 0.0888, "step": 3116 }, { "epoch": 0.73, "learning_rate": 1.7695624960109267e-05, "loss": 0.0294, "step": 3117 }, { "epoch": 0.73, "learning_rate": 1.7694006342908668e-05, "loss": 0.0586, "step": 3118 }, { "epoch": 0.73, "learning_rate": 1.769238723152354e-05, "loss": 0.0953, "step": 3119 }, { "epoch": 0.73, "learning_rate": 1.7690767626057867e-05, "loss": 0.0524, "step": 3120 }, { "epoch": 0.73, "learning_rate": 1.768914752661569e-05, "loss": 0.0554, "step": 3121 }, { "epoch": 0.73, "learning_rate": 1.768752693330106e-05, "loss": 0.1171, "step": 3122 }, { "epoch": 0.73, "learning_rate": 1.7685905846218068e-05, "loss": 0.0787, "step": 3123 }, { "epoch": 0.73, "learning_rate": 1.768428426547084e-05, "loss": 0.0759, "step": 3124 }, { "epoch": 0.73, "learning_rate": 1.7682662191163524e-05, "loss": 0.0727, "step": 3125 }, { "epoch": 0.73, "learning_rate": 1.768103962340031e-05, "loss": 0.0594, "step": 3126 }, { "epoch": 0.73, "learning_rate": 1.7679416562285413e-05, "loss": 0.1269, "step": 3127 }, { "epoch": 0.73, "learning_rate": 1.7677793007923085e-05, "loss": 0.0569, "step": 3128 }, { "epoch": 0.73, "learning_rate": 1.7676168960417602e-05, "loss": 0.0763, "step": 3129 }, { "epoch": 0.73, "learning_rate": 1.7674544419873282e-05, "loss": 0.0333, "step": 3130 }, { "epoch": 0.73, "learning_rate": 1.7672919386394462e-05, "loss": 0.0415, "step": 3131 }, { "epoch": 0.74, "learning_rate": 1.767129386008552e-05, "loss": 0.1379, "step": 3132 }, { "epoch": 0.74, "learning_rate": 1.766966784105087e-05, "loss": 0.0346, "step": 3133 }, { "epoch": 0.74, "learning_rate": 1.7668041329394938e-05, "loss": 0.0447, "step": 3134 }, { "epoch": 0.74, "learning_rate": 1.7666414325222202e-05, "loss": 0.0999, "step": 3135 }, { "epoch": 0.74, "learning_rate": 1.766478682863717e-05, "loss": 0.0239, "step": 3136 }, { "epoch": 0.74, "learning_rate": 1.7663158839744367e-05, "loss": 0.0701, "step": 3137 }, { "epoch": 0.74, "learning_rate": 1.766153035864836e-05, "loss": 0.0278, "step": 3138 }, { "epoch": 0.74, "learning_rate": 1.7659901385453747e-05, "loss": 0.0238, "step": 3139 }, { "epoch": 0.74, "learning_rate": 1.7658271920265157e-05, "loss": 0.0591, "step": 3140 }, { "epoch": 0.74, "learning_rate": 1.7656641963187248e-05, "loss": 0.1358, "step": 3141 }, { "epoch": 0.74, "learning_rate": 1.7655011514324717e-05, "loss": 0.0422, "step": 3142 }, { "epoch": 0.74, "learning_rate": 1.7653380573782285e-05, "loss": 0.0668, "step": 3143 }, { "epoch": 0.74, "learning_rate": 1.76517491416647e-05, "loss": 0.0464, "step": 3144 }, { "epoch": 0.74, "learning_rate": 1.765011721807676e-05, "loss": 0.0598, "step": 3145 }, { "epoch": 0.74, "learning_rate": 1.7648484803123276e-05, "loss": 0.0806, "step": 3146 }, { "epoch": 0.74, "learning_rate": 1.76468518969091e-05, "loss": 0.065, "step": 3147 }, { "epoch": 0.74, "learning_rate": 1.7645218499539115e-05, "loss": 0.033, "step": 3148 }, { "epoch": 0.74, "learning_rate": 1.7643584611118226e-05, "loss": 0.078, "step": 3149 }, { "epoch": 0.74, "learning_rate": 1.7641950231751387e-05, "loss": 0.1034, "step": 3150 }, { "epoch": 0.74, "learning_rate": 1.764031536154357e-05, "loss": 0.0762, "step": 3151 }, { "epoch": 0.74, "learning_rate": 1.7638680000599784e-05, "loss": 0.0801, "step": 3152 }, { "epoch": 0.74, "learning_rate": 1.7637044149025064e-05, "loss": 0.117, "step": 3153 }, { "epoch": 0.74, "learning_rate": 1.763540780692448e-05, "loss": 0.0847, "step": 3154 }, { "epoch": 0.74, "learning_rate": 1.763377097440314e-05, "loss": 0.0453, "step": 3155 }, { "epoch": 0.74, "learning_rate": 1.763213365156617e-05, "loss": 0.0414, "step": 3156 }, { "epoch": 0.74, "learning_rate": 1.763049583851874e-05, "loss": 0.0626, "step": 3157 }, { "epoch": 0.74, "learning_rate": 1.762885753536605e-05, "loss": 0.0657, "step": 3158 }, { "epoch": 0.74, "learning_rate": 1.762721874221332e-05, "loss": 0.1202, "step": 3159 }, { "epoch": 0.74, "learning_rate": 1.762557945916581e-05, "loss": 0.0536, "step": 3160 }, { "epoch": 0.74, "learning_rate": 1.7623939686328814e-05, "loss": 0.0898, "step": 3161 }, { "epoch": 0.74, "learning_rate": 1.7622299423807656e-05, "loss": 0.0776, "step": 3162 }, { "epoch": 0.74, "learning_rate": 1.762065867170769e-05, "loss": 0.0596, "step": 3163 }, { "epoch": 0.74, "learning_rate": 1.7619017430134293e-05, "loss": 0.0773, "step": 3164 }, { "epoch": 0.74, "learning_rate": 1.761737569919289e-05, "loss": 0.0954, "step": 3165 }, { "epoch": 0.74, "learning_rate": 1.7615733478988924e-05, "loss": 0.0608, "step": 3166 }, { "epoch": 0.74, "learning_rate": 1.761409076962788e-05, "loss": 0.0268, "step": 3167 }, { "epoch": 0.74, "learning_rate": 1.761244757121526e-05, "loss": 0.1026, "step": 3168 }, { "epoch": 0.74, "learning_rate": 1.7610803883856616e-05, "loss": 0.0412, "step": 3169 }, { "epoch": 0.74, "learning_rate": 1.7609159707657523e-05, "loss": 0.0633, "step": 3170 }, { "epoch": 0.74, "learning_rate": 1.7607515042723574e-05, "loss": 0.063, "step": 3171 }, { "epoch": 0.74, "learning_rate": 1.760586988916041e-05, "loss": 0.0761, "step": 3172 }, { "epoch": 0.74, "learning_rate": 1.7604224247073706e-05, "loss": 0.0552, "step": 3173 }, { "epoch": 0.74, "learning_rate": 1.760257811656916e-05, "loss": 0.0781, "step": 3174 }, { "epoch": 0.75, "learning_rate": 1.7600931497752492e-05, "loss": 0.049, "step": 3175 }, { "epoch": 0.75, "learning_rate": 1.7599284390729474e-05, "loss": 0.0636, "step": 3176 }, { "epoch": 0.75, "learning_rate": 1.7597636795605895e-05, "loss": 0.087, "step": 3177 }, { "epoch": 0.75, "learning_rate": 1.759598871248758e-05, "loss": 0.0621, "step": 3178 }, { "epoch": 0.75, "learning_rate": 1.759434014148039e-05, "loss": 0.0576, "step": 3179 }, { "epoch": 0.75, "learning_rate": 1.7592691082690206e-05, "loss": 0.1027, "step": 3180 }, { "epoch": 0.75, "learning_rate": 1.7591041536222952e-05, "loss": 0.1474, "step": 3181 }, { "epoch": 0.75, "learning_rate": 1.758939150218457e-05, "loss": 0.0547, "step": 3182 }, { "epoch": 0.75, "learning_rate": 1.7587740980681055e-05, "loss": 0.0624, "step": 3183 }, { "epoch": 0.75, "learning_rate": 1.7586089971818404e-05, "loss": 0.0498, "step": 3184 }, { "epoch": 0.75, "learning_rate": 1.7584438475702673e-05, "loss": 0.0212, "step": 3185 }, { "epoch": 0.75, "learning_rate": 1.7582786492439933e-05, "loss": 0.091, "step": 3186 }, { "epoch": 0.75, "learning_rate": 1.7581134022136288e-05, "loss": 0.0168, "step": 3187 }, { "epoch": 0.75, "learning_rate": 1.757948106489788e-05, "loss": 0.038, "step": 3188 }, { "epoch": 0.75, "learning_rate": 1.757782762083087e-05, "loss": 0.0934, "step": 3189 }, { "epoch": 0.75, "learning_rate": 1.757617369004147e-05, "loss": 0.1464, "step": 3190 }, { "epoch": 0.75, "learning_rate": 1.757451927263591e-05, "loss": 0.0367, "step": 3191 }, { "epoch": 0.75, "learning_rate": 1.7572864368720442e-05, "loss": 0.041, "step": 3192 }, { "epoch": 0.75, "learning_rate": 1.757120897840137e-05, "loss": 0.1531, "step": 3193 }, { "epoch": 0.75, "learning_rate": 1.756955310178502e-05, "loss": 0.0115, "step": 3194 }, { "epoch": 0.75, "learning_rate": 1.756789673897774e-05, "loss": 0.0182, "step": 3195 }, { "epoch": 0.75, "learning_rate": 1.756623989008593e-05, "loss": 0.0424, "step": 3196 }, { "epoch": 0.75, "learning_rate": 1.7564582555215994e-05, "loss": 0.0832, "step": 3197 }, { "epoch": 0.75, "learning_rate": 1.7562924734474395e-05, "loss": 0.0545, "step": 3198 }, { "epoch": 0.75, "learning_rate": 1.756126642796761e-05, "loss": 0.076, "step": 3199 }, { "epoch": 0.75, "learning_rate": 1.755960763580215e-05, "loss": 0.112, "step": 3200 }, { "epoch": 0.75, "learning_rate": 1.7557948358084566e-05, "loss": 0.1449, "step": 3201 }, { "epoch": 0.75, "learning_rate": 1.755628859492142e-05, "loss": 0.0716, "step": 3202 }, { "epoch": 0.75, "learning_rate": 1.7554628346419334e-05, "loss": 0.0239, "step": 3203 }, { "epoch": 0.75, "learning_rate": 1.7552967612684934e-05, "loss": 0.0558, "step": 3204 }, { "epoch": 0.75, "learning_rate": 1.755130639382489e-05, "loss": 0.0194, "step": 3205 }, { "epoch": 0.75, "learning_rate": 1.754964468994591e-05, "loss": 0.0543, "step": 3206 }, { "epoch": 0.75, "learning_rate": 1.7547982501154713e-05, "loss": 0.014, "step": 3207 }, { "epoch": 0.75, "learning_rate": 1.7546319827558068e-05, "loss": 0.0769, "step": 3208 }, { "epoch": 0.75, "learning_rate": 1.7544656669262766e-05, "loss": 0.0823, "step": 3209 }, { "epoch": 0.75, "learning_rate": 1.7542993026375636e-05, "loss": 0.0239, "step": 3210 }, { "epoch": 0.75, "learning_rate": 1.7541328899003525e-05, "loss": 0.0602, "step": 3211 }, { "epoch": 0.75, "learning_rate": 1.7539664287253327e-05, "loss": 0.0393, "step": 3212 }, { "epoch": 0.75, "learning_rate": 1.753799919123196e-05, "loss": 0.0383, "step": 3213 }, { "epoch": 0.75, "learning_rate": 1.7536333611046367e-05, "loss": 0.0973, "step": 3214 }, { "epoch": 0.75, "learning_rate": 1.753466754680353e-05, "loss": 0.024, "step": 3215 }, { "epoch": 0.75, "learning_rate": 1.7533000998610465e-05, "loss": 0.0198, "step": 3216 }, { "epoch": 0.76, "learning_rate": 1.7531333966574205e-05, "loss": 0.0599, "step": 3217 }, { "epoch": 0.76, "learning_rate": 1.752966645080183e-05, "loss": 0.1056, "step": 3218 }, { "epoch": 0.76, "learning_rate": 1.7527998451400443e-05, "loss": 0.0451, "step": 3219 }, { "epoch": 0.76, "learning_rate": 1.7526329968477178e-05, "loss": 0.0718, "step": 3220 }, { "epoch": 0.76, "learning_rate": 1.75246610021392e-05, "loss": 0.0616, "step": 3221 }, { "epoch": 0.76, "learning_rate": 1.752299155249371e-05, "loss": 0.0733, "step": 3222 }, { "epoch": 0.76, "learning_rate": 1.7521321619647937e-05, "loss": 0.0133, "step": 3223 }, { "epoch": 0.76, "learning_rate": 1.7519651203709134e-05, "loss": 0.0191, "step": 3224 }, { "epoch": 0.76, "learning_rate": 1.75179803047846e-05, "loss": 0.0381, "step": 3225 }, { "epoch": 0.76, "learning_rate": 1.7516308922981647e-05, "loss": 0.0107, "step": 3226 }, { "epoch": 0.76, "learning_rate": 1.7514637058407636e-05, "loss": 0.0759, "step": 3227 }, { "epoch": 0.76, "learning_rate": 1.7512964711169945e-05, "loss": 0.0921, "step": 3228 }, { "epoch": 0.76, "learning_rate": 1.751129188137599e-05, "loss": 0.0663, "step": 3229 }, { "epoch": 0.76, "learning_rate": 1.750961856913322e-05, "loss": 0.0594, "step": 3230 }, { "epoch": 0.76, "learning_rate": 1.7507944774549108e-05, "loss": 0.0469, "step": 3231 }, { "epoch": 0.76, "learning_rate": 1.7506270497731163e-05, "loss": 0.0263, "step": 3232 }, { "epoch": 0.76, "learning_rate": 1.750459573878692e-05, "loss": 0.1485, "step": 3233 }, { "epoch": 0.76, "learning_rate": 1.7502920497823953e-05, "loss": 0.0352, "step": 3234 }, { "epoch": 0.76, "learning_rate": 1.7501244774949862e-05, "loss": 0.0868, "step": 3235 }, { "epoch": 0.76, "learning_rate": 1.7499568570272277e-05, "loss": 0.0509, "step": 3236 }, { "epoch": 0.76, "learning_rate": 1.7497891883898856e-05, "loss": 0.0953, "step": 3237 }, { "epoch": 0.76, "learning_rate": 1.7496214715937297e-05, "loss": 0.0909, "step": 3238 }, { "epoch": 0.76, "learning_rate": 1.7494537066495327e-05, "loss": 0.0132, "step": 3239 }, { "epoch": 0.76, "learning_rate": 1.74928589356807e-05, "loss": 0.075, "step": 3240 }, { "epoch": 0.76, "learning_rate": 1.7491180323601194e-05, "loss": 0.0508, "step": 3241 }, { "epoch": 0.76, "learning_rate": 1.7489501230364635e-05, "loss": 0.0379, "step": 3242 }, { "epoch": 0.76, "learning_rate": 1.7487821656078866e-05, "loss": 0.1631, "step": 3243 }, { "epoch": 0.76, "learning_rate": 1.748614160085177e-05, "loss": 0.0908, "step": 3244 }, { "epoch": 0.76, "learning_rate": 1.7484461064791253e-05, "loss": 0.0684, "step": 3245 }, { "epoch": 0.76, "learning_rate": 1.7482780048005253e-05, "loss": 0.021, "step": 3246 }, { "epoch": 0.76, "learning_rate": 1.748109855060175e-05, "loss": 0.0369, "step": 3247 }, { "epoch": 0.76, "learning_rate": 1.7479416572688735e-05, "loss": 0.0417, "step": 3248 }, { "epoch": 0.76, "learning_rate": 1.747773411437425e-05, "loss": 0.0396, "step": 3249 }, { "epoch": 0.76, "learning_rate": 1.7476051175766356e-05, "loss": 0.0578, "step": 3250 }, { "epoch": 0.76, "learning_rate": 1.7474367756973148e-05, "loss": 0.0458, "step": 3251 }, { "epoch": 0.76, "learning_rate": 1.7472683858102753e-05, "loss": 0.0829, "step": 3252 }, { "epoch": 0.76, "learning_rate": 1.7470999479263323e-05, "loss": 0.067, "step": 3253 }, { "epoch": 0.76, "learning_rate": 1.746931462056305e-05, "loss": 0.0948, "step": 3254 }, { "epoch": 0.76, "learning_rate": 1.7467629282110152e-05, "loss": 0.099, "step": 3255 }, { "epoch": 0.76, "learning_rate": 1.7465943464012874e-05, "loss": 0.054, "step": 3256 }, { "epoch": 0.76, "learning_rate": 1.74642571663795e-05, "loss": 0.0243, "step": 3257 }, { "epoch": 0.76, "learning_rate": 1.7462570389318337e-05, "loss": 0.06, "step": 3258 }, { "epoch": 0.76, "learning_rate": 1.7460883132937733e-05, "loss": 0.0316, "step": 3259 }, { "epoch": 0.77, "learning_rate": 1.745919539734605e-05, "loss": 0.0617, "step": 3260 }, { "epoch": 0.77, "learning_rate": 1.7457507182651697e-05, "loss": 0.0393, "step": 3261 }, { "epoch": 0.77, "learning_rate": 1.745581848896311e-05, "loss": 0.0763, "step": 3262 }, { "epoch": 0.77, "learning_rate": 1.745412931638875e-05, "loss": 0.074, "step": 3263 }, { "epoch": 0.77, "learning_rate": 1.745243966503711e-05, "loss": 0.0598, "step": 3264 }, { "epoch": 0.77, "learning_rate": 1.7450749535016722e-05, "loss": 0.1009, "step": 3265 }, { "epoch": 0.77, "learning_rate": 1.7449058926436137e-05, "loss": 0.0485, "step": 3266 }, { "epoch": 0.77, "learning_rate": 1.7447367839403947e-05, "loss": 0.0383, "step": 3267 }, { "epoch": 0.77, "learning_rate": 1.7445676274028764e-05, "loss": 0.0182, "step": 3268 }, { "epoch": 0.77, "learning_rate": 1.744398423041924e-05, "loss": 0.0474, "step": 3269 }, { "epoch": 0.77, "learning_rate": 1.7442291708684064e-05, "loss": 0.0614, "step": 3270 }, { "epoch": 0.77, "learning_rate": 1.744059870893193e-05, "loss": 0.0597, "step": 3271 }, { "epoch": 0.77, "learning_rate": 1.743890523127159e-05, "loss": 0.1081, "step": 3272 }, { "epoch": 0.77, "learning_rate": 1.7437211275811813e-05, "loss": 0.0406, "step": 3273 }, { "epoch": 0.77, "learning_rate": 1.7435516842661403e-05, "loss": 0.0381, "step": 3274 }, { "epoch": 0.77, "learning_rate": 1.7433821931929186e-05, "loss": 0.0515, "step": 3275 }, { "epoch": 0.77, "learning_rate": 1.743212654372404e-05, "loss": 0.0621, "step": 3276 }, { "epoch": 0.77, "learning_rate": 1.7430430678154842e-05, "loss": 0.0194, "step": 3277 }, { "epoch": 0.77, "learning_rate": 1.7428734335330526e-05, "loss": 0.0809, "step": 3278 }, { "epoch": 0.77, "learning_rate": 1.742703751536005e-05, "loss": 0.0673, "step": 3279 }, { "epoch": 0.77, "learning_rate": 1.7425340218352395e-05, "loss": 0.0501, "step": 3280 }, { "epoch": 0.77, "learning_rate": 1.7423642444416583e-05, "loss": 0.0785, "step": 3281 }, { "epoch": 0.77, "learning_rate": 1.7421944193661663e-05, "loss": 0.1515, "step": 3282 }, { "epoch": 0.77, "learning_rate": 1.7420245466196708e-05, "loss": 0.0586, "step": 3283 }, { "epoch": 0.77, "learning_rate": 1.7418546262130824e-05, "loss": 0.0238, "step": 3284 }, { "epoch": 0.77, "learning_rate": 1.7416846581573162e-05, "loss": 0.0493, "step": 3285 }, { "epoch": 0.77, "learning_rate": 1.741514642463288e-05, "loss": 0.0765, "step": 3286 }, { "epoch": 0.77, "learning_rate": 1.7413445791419188e-05, "loss": 0.0801, "step": 3287 }, { "epoch": 0.77, "learning_rate": 1.7411744682041312e-05, "loss": 0.0573, "step": 3288 }, { "epoch": 0.77, "learning_rate": 1.7410043096608514e-05, "loss": 0.0833, "step": 3289 }, { "epoch": 0.77, "learning_rate": 1.740834103523009e-05, "loss": 0.0648, "step": 3290 }, { "epoch": 0.77, "learning_rate": 1.7406638498015364e-05, "loss": 0.0848, "step": 3291 }, { "epoch": 0.77, "learning_rate": 1.740493548507368e-05, "loss": 0.0225, "step": 3292 }, { "epoch": 0.77, "learning_rate": 1.7403231996514436e-05, "loss": 0.069, "step": 3293 }, { "epoch": 0.77, "learning_rate": 1.7401528032447036e-05, "loss": 0.0457, "step": 3294 }, { "epoch": 0.77, "learning_rate": 1.7399823592980928e-05, "loss": 0.04, "step": 3295 }, { "epoch": 0.77, "learning_rate": 1.7398118678225587e-05, "loss": 0.0516, "step": 3296 }, { "epoch": 0.77, "learning_rate": 1.7396413288290527e-05, "loss": 0.0085, "step": 3297 }, { "epoch": 0.77, "learning_rate": 1.7394707423285275e-05, "loss": 0.0492, "step": 3298 }, { "epoch": 0.77, "learning_rate": 1.7393001083319403e-05, "loss": 0.0791, "step": 3299 }, { "epoch": 0.77, "learning_rate": 1.739129426850251e-05, "loss": 0.028, "step": 3300 }, { "epoch": 0.77, "learning_rate": 1.738958697894422e-05, "loss": 0.0427, "step": 3301 }, { "epoch": 0.77, "learning_rate": 1.7387879214754195e-05, "loss": 0.0679, "step": 3302 }, { "epoch": 0.78, "learning_rate": 1.7386170976042124e-05, "loss": 0.0265, "step": 3303 }, { "epoch": 0.78, "learning_rate": 1.7384462262917728e-05, "loss": 0.2256, "step": 3304 }, { "epoch": 0.78, "learning_rate": 1.7382753075490755e-05, "loss": 0.0382, "step": 3305 }, { "epoch": 0.78, "learning_rate": 1.7381043413870987e-05, "loss": 0.1074, "step": 3306 }, { "epoch": 0.78, "learning_rate": 1.7379333278168234e-05, "loss": 0.0834, "step": 3307 }, { "epoch": 0.78, "learning_rate": 1.737762266849234e-05, "loss": 0.1131, "step": 3308 }, { "epoch": 0.78, "learning_rate": 1.7375911584953175e-05, "loss": 0.1039, "step": 3309 }, { "epoch": 0.78, "learning_rate": 1.737420002766064e-05, "loss": 0.081, "step": 3310 }, { "epoch": 0.78, "learning_rate": 1.7372487996724677e-05, "loss": 0.0842, "step": 3311 }, { "epoch": 0.78, "learning_rate": 1.7370775492255235e-05, "loss": 0.0374, "step": 3312 }, { "epoch": 0.78, "learning_rate": 1.736906251436232e-05, "loss": 0.0606, "step": 3313 }, { "epoch": 0.78, "learning_rate": 1.7367349063155955e-05, "loss": 0.0312, "step": 3314 }, { "epoch": 0.78, "learning_rate": 1.7365635138746186e-05, "loss": 0.0232, "step": 3315 }, { "epoch": 0.78, "learning_rate": 1.7363920741243102e-05, "loss": 0.0878, "step": 3316 }, { "epoch": 0.78, "learning_rate": 1.7362205870756827e-05, "loss": 0.1406, "step": 3317 }, { "epoch": 0.78, "learning_rate": 1.7360490527397496e-05, "loss": 0.1546, "step": 3318 }, { "epoch": 0.78, "learning_rate": 1.7358774711275287e-05, "loss": 0.087, "step": 3319 }, { "epoch": 0.78, "learning_rate": 1.7357058422500412e-05, "loss": 0.094, "step": 3320 }, { "epoch": 0.78, "learning_rate": 1.7355341661183102e-05, "loss": 0.0424, "step": 3321 }, { "epoch": 0.78, "learning_rate": 1.735362442743363e-05, "loss": 0.0265, "step": 3322 }, { "epoch": 0.78, "learning_rate": 1.7351906721362283e-05, "loss": 0.0531, "step": 3323 }, { "epoch": 0.78, "learning_rate": 1.73501885430794e-05, "loss": 0.0928, "step": 3324 }, { "epoch": 0.78, "learning_rate": 1.7348469892695338e-05, "loss": 0.0657, "step": 3325 }, { "epoch": 0.78, "learning_rate": 1.734675077032048e-05, "loss": 0.0606, "step": 3326 }, { "epoch": 0.78, "learning_rate": 1.734503117606525e-05, "loss": 0.013, "step": 3327 }, { "epoch": 0.78, "learning_rate": 1.7343311110040094e-05, "loss": 0.0218, "step": 3328 }, { "epoch": 0.78, "learning_rate": 1.7341590572355493e-05, "loss": 0.0244, "step": 3329 }, { "epoch": 0.78, "learning_rate": 1.7339869563121955e-05, "loss": 0.0187, "step": 3330 }, { "epoch": 0.78, "learning_rate": 1.733814808245002e-05, "loss": 0.063, "step": 3331 }, { "epoch": 0.78, "learning_rate": 1.7336426130450264e-05, "loss": 0.1054, "step": 3332 }, { "epoch": 0.78, "learning_rate": 1.7334703707233283e-05, "loss": 0.0623, "step": 3333 }, { "epoch": 0.78, "learning_rate": 1.7332980812909707e-05, "loss": 0.139, "step": 3334 }, { "epoch": 0.78, "learning_rate": 1.7331257447590197e-05, "loss": 0.1355, "step": 3335 }, { "epoch": 0.78, "learning_rate": 1.7329533611385448e-05, "loss": 0.0983, "step": 3336 }, { "epoch": 0.78, "learning_rate": 1.7327809304406177e-05, "loss": 0.0673, "step": 3337 }, { "epoch": 0.78, "learning_rate": 1.7326084526763142e-05, "loss": 0.01, "step": 3338 }, { "epoch": 0.78, "learning_rate": 1.7324359278567118e-05, "loss": 0.0892, "step": 3339 }, { "epoch": 0.78, "learning_rate": 1.7322633559928923e-05, "loss": 0.0512, "step": 3340 }, { "epoch": 0.78, "learning_rate": 1.73209073709594e-05, "loss": 0.0248, "step": 3341 }, { "epoch": 0.78, "learning_rate": 1.7319180711769416e-05, "loss": 0.0283, "step": 3342 }, { "epoch": 0.78, "learning_rate": 1.7317453582469876e-05, "loss": 0.0239, "step": 3343 }, { "epoch": 0.78, "learning_rate": 1.7315725983171717e-05, "loss": 0.0886, "step": 3344 }, { "epoch": 0.79, "learning_rate": 1.73139979139859e-05, "loss": 0.0211, "step": 3345 }, { "epoch": 0.79, "learning_rate": 1.731226937502342e-05, "loss": 0.0861, "step": 3346 }, { "epoch": 0.79, "learning_rate": 1.7310540366395295e-05, "loss": 0.033, "step": 3347 }, { "epoch": 0.79, "learning_rate": 1.7308810888212586e-05, "loss": 0.0973, "step": 3348 }, { "epoch": 0.79, "learning_rate": 1.7307080940586375e-05, "loss": 0.0259, "step": 3349 }, { "epoch": 0.79, "learning_rate": 1.730535052362777e-05, "loss": 0.0074, "step": 3350 }, { "epoch": 0.79, "learning_rate": 1.7303619637447926e-05, "loss": 0.1305, "step": 3351 }, { "epoch": 0.79, "learning_rate": 1.730188828215801e-05, "loss": 0.0143, "step": 3352 }, { "epoch": 0.79, "learning_rate": 1.7300156457869227e-05, "loss": 0.063, "step": 3353 }, { "epoch": 0.79, "learning_rate": 1.7298424164692816e-05, "loss": 0.1269, "step": 3354 }, { "epoch": 0.79, "learning_rate": 1.729669140274004e-05, "loss": 0.1816, "step": 3355 }, { "epoch": 0.79, "learning_rate": 1.7294958172122195e-05, "loss": 0.1264, "step": 3356 }, { "epoch": 0.79, "learning_rate": 1.72932244729506e-05, "loss": 0.0916, "step": 3357 }, { "epoch": 0.79, "learning_rate": 1.7291490305336615e-05, "loss": 0.1253, "step": 3358 }, { "epoch": 0.79, "learning_rate": 1.7289755669391627e-05, "loss": 0.0467, "step": 3359 }, { "epoch": 0.79, "learning_rate": 1.7288020565227047e-05, "loss": 0.0301, "step": 3360 }, { "epoch": 0.79, "learning_rate": 1.7286284992954322e-05, "loss": 0.0515, "step": 3361 }, { "epoch": 0.79, "learning_rate": 1.728454895268493e-05, "loss": 0.1097, "step": 3362 }, { "epoch": 0.79, "learning_rate": 1.7282812444530375e-05, "loss": 0.0742, "step": 3363 }, { "epoch": 0.79, "learning_rate": 1.7281075468602192e-05, "loss": 0.1065, "step": 3364 }, { "epoch": 0.79, "learning_rate": 1.7279338025011944e-05, "loss": 0.0278, "step": 3365 }, { "epoch": 0.79, "learning_rate": 1.727760011387123e-05, "loss": 0.0386, "step": 3366 }, { "epoch": 0.79, "learning_rate": 1.7275861735291678e-05, "loss": 0.0941, "step": 3367 }, { "epoch": 0.79, "learning_rate": 1.7274122889384938e-05, "loss": 0.0669, "step": 3368 }, { "epoch": 0.79, "learning_rate": 1.72723835762627e-05, "loss": 0.0346, "step": 3369 }, { "epoch": 0.79, "learning_rate": 1.7270643796036676e-05, "loss": 0.0469, "step": 3370 }, { "epoch": 0.79, "learning_rate": 1.7268903548818615e-05, "loss": 0.1011, "step": 3371 }, { "epoch": 0.79, "learning_rate": 1.7267162834720292e-05, "loss": 0.021, "step": 3372 }, { "epoch": 0.79, "learning_rate": 1.7265421653853517e-05, "loss": 0.0356, "step": 3373 }, { "epoch": 0.79, "learning_rate": 1.7263680006330115e-05, "loss": 0.0327, "step": 3374 }, { "epoch": 0.79, "learning_rate": 1.7261937892261958e-05, "loss": 0.0419, "step": 3375 }, { "epoch": 0.79, "learning_rate": 1.7260195311760943e-05, "loss": 0.0581, "step": 3376 }, { "epoch": 0.79, "learning_rate": 1.7258452264938997e-05, "loss": 0.0497, "step": 3377 }, { "epoch": 0.79, "learning_rate": 1.725670875190807e-05, "loss": 0.0343, "step": 3378 }, { "epoch": 0.79, "learning_rate": 1.725496477278015e-05, "loss": 0.0154, "step": 3379 }, { "epoch": 0.79, "learning_rate": 1.7253220327667253e-05, "loss": 0.0544, "step": 3380 }, { "epoch": 0.79, "learning_rate": 1.7251475416681423e-05, "loss": 0.0524, "step": 3381 }, { "epoch": 0.79, "learning_rate": 1.7249730039934738e-05, "loss": 0.0212, "step": 3382 }, { "epoch": 0.79, "learning_rate": 1.72479841975393e-05, "loss": 0.0264, "step": 3383 }, { "epoch": 0.79, "learning_rate": 1.7246237889607248e-05, "loss": 0.0494, "step": 3384 }, { "epoch": 0.79, "learning_rate": 1.724449111625074e-05, "loss": 0.0909, "step": 3385 }, { "epoch": 0.79, "learning_rate": 1.724274387758198e-05, "loss": 0.0062, "step": 3386 }, { "epoch": 0.79, "learning_rate": 1.724099617371319e-05, "loss": 0.0293, "step": 3387 }, { "epoch": 0.8, "learning_rate": 1.7239248004756616e-05, "loss": 0.0217, "step": 3388 }, { "epoch": 0.8, "learning_rate": 1.7237499370824556e-05, "loss": 0.0058, "step": 3389 }, { "epoch": 0.8, "learning_rate": 1.7235750272029315e-05, "loss": 0.0926, "step": 3390 }, { "epoch": 0.8, "learning_rate": 1.723400070848324e-05, "loss": 0.0088, "step": 3391 }, { "epoch": 0.8, "learning_rate": 1.7232250680298707e-05, "loss": 0.0934, "step": 3392 }, { "epoch": 0.8, "learning_rate": 1.7230500187588118e-05, "loss": 0.2083, "step": 3393 }, { "epoch": 0.8, "learning_rate": 1.7228749230463906e-05, "loss": 0.0558, "step": 3394 }, { "epoch": 0.8, "learning_rate": 1.7226997809038537e-05, "loss": 0.0379, "step": 3395 }, { "epoch": 0.8, "learning_rate": 1.72252459234245e-05, "loss": 0.1257, "step": 3396 }, { "epoch": 0.8, "learning_rate": 1.7223493573734327e-05, "loss": 0.0994, "step": 3397 }, { "epoch": 0.8, "learning_rate": 1.7221740760080564e-05, "loss": 0.0266, "step": 3398 }, { "epoch": 0.8, "learning_rate": 1.7219987482575794e-05, "loss": 0.0218, "step": 3399 }, { "epoch": 0.8, "learning_rate": 1.7218233741332628e-05, "loss": 0.0323, "step": 3400 }, { "epoch": 0.8, "learning_rate": 1.7216479536463716e-05, "loss": 0.0751, "step": 3401 }, { "epoch": 0.8, "learning_rate": 1.7214724868081725e-05, "loss": 0.0391, "step": 3402 }, { "epoch": 0.8, "learning_rate": 1.721296973629936e-05, "loss": 0.0632, "step": 3403 }, { "epoch": 0.8, "learning_rate": 1.7211214141229348e-05, "loss": 0.0377, "step": 3404 }, { "epoch": 0.8, "learning_rate": 1.720945808298445e-05, "loss": 0.0311, "step": 3405 }, { "epoch": 0.8, "learning_rate": 1.720770156167747e-05, "loss": 0.0488, "step": 3406 }, { "epoch": 0.8, "learning_rate": 1.720594457742121e-05, "loss": 0.0366, "step": 3407 }, { "epoch": 0.8, "learning_rate": 1.7204187130328533e-05, "loss": 0.049, "step": 3408 }, { "epoch": 0.8, "learning_rate": 1.7202429220512318e-05, "loss": 0.0367, "step": 3409 }, { "epoch": 0.8, "learning_rate": 1.7200670848085478e-05, "loss": 0.0351, "step": 3410 }, { "epoch": 0.8, "learning_rate": 1.7198912013160942e-05, "loss": 0.1233, "step": 3411 }, { "epoch": 0.8, "learning_rate": 1.719715271585169e-05, "loss": 0.022, "step": 3412 }, { "epoch": 0.8, "learning_rate": 1.7195392956270717e-05, "loss": 0.026, "step": 3413 }, { "epoch": 0.8, "learning_rate": 1.7193632734531053e-05, "loss": 0.0413, "step": 3414 }, { "epoch": 0.8, "learning_rate": 1.7191872050745755e-05, "loss": 0.0487, "step": 3415 }, { "epoch": 0.8, "learning_rate": 1.719011090502791e-05, "loss": 0.0553, "step": 3416 }, { "epoch": 0.8, "learning_rate": 1.7188349297490646e-05, "loss": 0.0503, "step": 3417 }, { "epoch": 0.8, "learning_rate": 1.71865872282471e-05, "loss": 0.0572, "step": 3418 }, { "epoch": 0.8, "learning_rate": 1.7184824697410453e-05, "loss": 0.1338, "step": 3419 }, { "epoch": 0.8, "learning_rate": 1.7183061705093913e-05, "loss": 0.0473, "step": 3420 }, { "epoch": 0.8, "learning_rate": 1.7181298251410714e-05, "loss": 0.0685, "step": 3421 }, { "epoch": 0.8, "learning_rate": 1.7179534336474128e-05, "loss": 0.0303, "step": 3422 }, { "epoch": 0.8, "learning_rate": 1.7177769960397443e-05, "loss": 0.1511, "step": 3423 }, { "epoch": 0.8, "learning_rate": 1.717600512329399e-05, "loss": 0.1087, "step": 3424 }, { "epoch": 0.8, "learning_rate": 1.7174239825277125e-05, "loss": 0.0281, "step": 3425 }, { "epoch": 0.8, "learning_rate": 1.7172474066460222e-05, "loss": 0.1087, "step": 3426 }, { "epoch": 0.8, "learning_rate": 1.717070784695671e-05, "loss": 0.0204, "step": 3427 }, { "epoch": 0.8, "learning_rate": 1.716894116688003e-05, "loss": 0.0809, "step": 3428 }, { "epoch": 0.8, "learning_rate": 1.7167174026343646e-05, "loss": 0.0311, "step": 3429 }, { "epoch": 0.8, "learning_rate": 1.7165406425461068e-05, "loss": 0.0309, "step": 3430 }, { "epoch": 0.81, "learning_rate": 1.716363836434583e-05, "loss": 0.0355, "step": 3431 }, { "epoch": 0.81, "learning_rate": 1.716186984311149e-05, "loss": 0.0188, "step": 3432 }, { "epoch": 0.81, "learning_rate": 1.7160100861871645e-05, "loss": 0.0487, "step": 3433 }, { "epoch": 0.81, "learning_rate": 1.715833142073991e-05, "loss": 0.0782, "step": 3434 }, { "epoch": 0.81, "learning_rate": 1.7156561519829938e-05, "loss": 0.0248, "step": 3435 }, { "epoch": 0.81, "learning_rate": 1.7154791159255414e-05, "loss": 0.0608, "step": 3436 }, { "epoch": 0.81, "learning_rate": 1.7153020339130042e-05, "loss": 0.0261, "step": 3437 }, { "epoch": 0.81, "learning_rate": 1.715124905956756e-05, "loss": 0.0383, "step": 3438 }, { "epoch": 0.81, "learning_rate": 1.714947732068175e-05, "loss": 0.1027, "step": 3439 }, { "epoch": 0.81, "learning_rate": 1.7147705122586398e-05, "loss": 0.0547, "step": 3440 }, { "epoch": 0.81, "learning_rate": 1.7145932465395333e-05, "loss": 0.1365, "step": 3441 }, { "epoch": 0.81, "learning_rate": 1.7144159349222413e-05, "loss": 0.067, "step": 3442 }, { "epoch": 0.81, "learning_rate": 1.714238577418153e-05, "loss": 0.0541, "step": 3443 }, { "epoch": 0.81, "learning_rate": 1.7140611740386597e-05, "loss": 0.0787, "step": 3444 }, { "epoch": 0.81, "learning_rate": 1.7138837247951555e-05, "loss": 0.0681, "step": 3445 }, { "epoch": 0.81, "learning_rate": 1.7137062296990387e-05, "loss": 0.0614, "step": 3446 }, { "epoch": 0.81, "learning_rate": 1.7135286887617098e-05, "loss": 0.0357, "step": 3447 }, { "epoch": 0.81, "learning_rate": 1.7133511019945713e-05, "loss": 0.0239, "step": 3448 }, { "epoch": 0.81, "learning_rate": 1.7131734694090304e-05, "loss": 0.1091, "step": 3449 }, { "epoch": 0.81, "learning_rate": 1.712995791016496e-05, "loss": 0.0529, "step": 3450 }, { "epoch": 0.81, "learning_rate": 1.712818066828381e-05, "loss": 0.0455, "step": 3451 }, { "epoch": 0.81, "learning_rate": 1.7126402968560997e-05, "loss": 0.0821, "step": 3452 }, { "epoch": 0.81, "learning_rate": 1.7124624811110708e-05, "loss": 0.103, "step": 3453 }, { "epoch": 0.81, "learning_rate": 1.7122846196047152e-05, "loss": 0.0907, "step": 3454 }, { "epoch": 0.81, "learning_rate": 1.7121067123484567e-05, "loss": 0.0266, "step": 3455 }, { "epoch": 0.81, "learning_rate": 1.7119287593537225e-05, "loss": 0.082, "step": 3456 }, { "epoch": 0.81, "learning_rate": 1.7117507606319427e-05, "loss": 0.0492, "step": 3457 }, { "epoch": 0.81, "learning_rate": 1.71157271619455e-05, "loss": 0.0354, "step": 3458 }, { "epoch": 0.81, "learning_rate": 1.7113946260529796e-05, "loss": 0.0849, "step": 3459 }, { "epoch": 0.81, "learning_rate": 1.7112164902186705e-05, "loss": 0.0202, "step": 3460 }, { "epoch": 0.81, "learning_rate": 1.7110383087030648e-05, "loss": 0.0297, "step": 3461 }, { "epoch": 0.81, "learning_rate": 1.710860081517607e-05, "loss": 0.0413, "step": 3462 }, { "epoch": 0.81, "learning_rate": 1.7106818086737437e-05, "loss": 0.075, "step": 3463 }, { "epoch": 0.81, "learning_rate": 1.7105034901829263e-05, "loss": 0.0968, "step": 3464 }, { "epoch": 0.81, "learning_rate": 1.7103251260566076e-05, "loss": 0.0384, "step": 3465 }, { "epoch": 0.81, "learning_rate": 1.7101467163062443e-05, "loss": 0.0181, "step": 3466 }, { "epoch": 0.81, "learning_rate": 1.7099682609432954e-05, "loss": 0.0485, "step": 3467 }, { "epoch": 0.81, "learning_rate": 1.709789759979223e-05, "loss": 0.0731, "step": 3468 }, { "epoch": 0.81, "learning_rate": 1.7096112134254922e-05, "loss": 0.0814, "step": 3469 }, { "epoch": 0.81, "learning_rate": 1.7094326212935715e-05, "loss": 0.1483, "step": 3470 }, { "epoch": 0.81, "learning_rate": 1.709253983594931e-05, "loss": 0.0383, "step": 3471 }, { "epoch": 0.81, "learning_rate": 1.709075300341045e-05, "loss": 0.1517, "step": 3472 }, { "epoch": 0.82, "learning_rate": 1.7088965715433903e-05, "loss": 0.0573, "step": 3473 }, { "epoch": 0.82, "learning_rate": 1.7087177972134467e-05, "loss": 0.0774, "step": 3474 }, { "epoch": 0.82, "learning_rate": 1.7085389773626965e-05, "loss": 0.075, "step": 3475 }, { "epoch": 0.82, "learning_rate": 1.7083601120026256e-05, "loss": 0.0929, "step": 3476 }, { "epoch": 0.82, "learning_rate": 1.7081812011447226e-05, "loss": 0.0567, "step": 3477 }, { "epoch": 0.82, "learning_rate": 1.708002244800478e-05, "loss": 0.0457, "step": 3478 }, { "epoch": 0.82, "learning_rate": 1.7078232429813875e-05, "loss": 0.0565, "step": 3479 }, { "epoch": 0.82, "learning_rate": 1.7076441956989476e-05, "loss": 0.0489, "step": 3480 }, { "epoch": 0.82, "learning_rate": 1.7074651029646584e-05, "loss": 0.0288, "step": 3481 }, { "epoch": 0.82, "learning_rate": 1.707285964790023e-05, "loss": 0.0604, "step": 3482 }, { "epoch": 0.82, "learning_rate": 1.7071067811865477e-05, "loss": 0.0674, "step": 3483 }, { "epoch": 0.82, "learning_rate": 1.7069275521657412e-05, "loss": 0.092, "step": 3484 }, { "epoch": 0.82, "learning_rate": 1.7067482777391155e-05, "loss": 0.0359, "step": 3485 }, { "epoch": 0.82, "learning_rate": 1.7065689579181852e-05, "loss": 0.0294, "step": 3486 }, { "epoch": 0.82, "learning_rate": 1.706389592714468e-05, "loss": 0.0512, "step": 3487 }, { "epoch": 0.82, "learning_rate": 1.7062101821394845e-05, "loss": 0.0183, "step": 3488 }, { "epoch": 0.82, "learning_rate": 1.7060307262047585e-05, "loss": 0.1013, "step": 3489 }, { "epoch": 0.82, "learning_rate": 1.705851224921816e-05, "loss": 0.0315, "step": 3490 }, { "epoch": 0.82, "learning_rate": 1.7056716783021864e-05, "loss": 0.0578, "step": 3491 }, { "epoch": 0.82, "learning_rate": 1.7054920863574023e-05, "loss": 0.066, "step": 3492 }, { "epoch": 0.82, "learning_rate": 1.7053124490989987e-05, "loss": 0.0725, "step": 3493 }, { "epoch": 0.82, "learning_rate": 1.705132766538513e-05, "loss": 0.0471, "step": 3494 }, { "epoch": 0.82, "learning_rate": 1.7049530386874874e-05, "loss": 0.0777, "step": 3495 }, { "epoch": 0.82, "learning_rate": 1.7047732655574645e-05, "loss": 0.0406, "step": 3496 }, { "epoch": 0.82, "learning_rate": 1.7045934471599924e-05, "loss": 0.0417, "step": 3497 }, { "epoch": 0.82, "learning_rate": 1.7044135835066195e-05, "loss": 0.0258, "step": 3498 }, { "epoch": 0.82, "learning_rate": 1.7042336746088995e-05, "loss": 0.0535, "step": 3499 }, { "epoch": 0.82, "learning_rate": 1.704053720478387e-05, "loss": 0.0995, "step": 3500 }, { "epoch": 0.82, "learning_rate": 1.7038737211266413e-05, "loss": 0.0129, "step": 3501 }, { "epoch": 0.82, "learning_rate": 1.703693676565223e-05, "loss": 0.0469, "step": 3502 }, { "epoch": 0.82, "learning_rate": 1.703513586805697e-05, "loss": 0.1027, "step": 3503 }, { "epoch": 0.82, "learning_rate": 1.7033334518596296e-05, "loss": 0.0399, "step": 3504 }, { "epoch": 0.82, "learning_rate": 1.7031532717385914e-05, "loss": 0.0143, "step": 3505 }, { "epoch": 0.82, "learning_rate": 1.702973046454155e-05, "loss": 0.0408, "step": 3506 }, { "epoch": 0.82, "learning_rate": 1.702792776017897e-05, "loss": 0.0642, "step": 3507 }, { "epoch": 0.82, "learning_rate": 1.702612460441395e-05, "loss": 0.0864, "step": 3508 }, { "epoch": 0.82, "learning_rate": 1.702432099736232e-05, "loss": 0.0781, "step": 3509 }, { "epoch": 0.82, "learning_rate": 1.702251693913991e-05, "loss": 0.1226, "step": 3510 }, { "epoch": 0.82, "learning_rate": 1.70207124298626e-05, "loss": 0.0973, "step": 3511 }, { "epoch": 0.82, "learning_rate": 1.7018907469646304e-05, "loss": 0.0362, "step": 3512 }, { "epoch": 0.82, "learning_rate": 1.701710205860694e-05, "loss": 0.0659, "step": 3513 }, { "epoch": 0.82, "learning_rate": 1.7015296196860474e-05, "loss": 0.0754, "step": 3514 }, { "epoch": 0.82, "learning_rate": 1.70134898845229e-05, "loss": 0.0159, "step": 3515 }, { "epoch": 0.83, "learning_rate": 1.701168312171023e-05, "loss": 0.0335, "step": 3516 }, { "epoch": 0.83, "learning_rate": 1.7009875908538516e-05, "loss": 0.0163, "step": 3517 }, { "epoch": 0.83, "learning_rate": 1.700806824512384e-05, "loss": 0.0969, "step": 3518 }, { "epoch": 0.83, "learning_rate": 1.7006260131582297e-05, "loss": 0.0309, "step": 3519 }, { "epoch": 0.83, "learning_rate": 1.700445156803003e-05, "loss": 0.0763, "step": 3520 }, { "epoch": 0.83, "learning_rate": 1.7002642554583198e-05, "loss": 0.0198, "step": 3521 }, { "epoch": 0.83, "learning_rate": 1.7000833091357994e-05, "loss": 0.0516, "step": 3522 }, { "epoch": 0.83, "learning_rate": 1.6999023178470646e-05, "loss": 0.0319, "step": 3523 }, { "epoch": 0.83, "learning_rate": 1.69972128160374e-05, "loss": 0.1019, "step": 3524 }, { "epoch": 0.83, "learning_rate": 1.699540200417453e-05, "loss": 0.0234, "step": 3525 }, { "epoch": 0.83, "learning_rate": 1.699359074299835e-05, "loss": 0.0121, "step": 3526 }, { "epoch": 0.83, "learning_rate": 1.6991779032625197e-05, "loss": 0.0579, "step": 3527 }, { "epoch": 0.83, "learning_rate": 1.6989966873171434e-05, "loss": 0.0412, "step": 3528 }, { "epoch": 0.83, "learning_rate": 1.698815426475346e-05, "loss": 0.1307, "step": 3529 }, { "epoch": 0.83, "learning_rate": 1.6986341207487696e-05, "loss": 0.0226, "step": 3530 }, { "epoch": 0.83, "learning_rate": 1.698452770149059e-05, "loss": 0.0749, "step": 3531 }, { "epoch": 0.83, "learning_rate": 1.698271374687863e-05, "loss": 0.0684, "step": 3532 }, { "epoch": 0.83, "learning_rate": 1.698089934376832e-05, "loss": 0.0654, "step": 3533 }, { "epoch": 0.83, "learning_rate": 1.6979084492276207e-05, "loss": 0.0455, "step": 3534 }, { "epoch": 0.83, "learning_rate": 1.6977269192518852e-05, "loss": 0.0793, "step": 3535 }, { "epoch": 0.83, "learning_rate": 1.697545344461285e-05, "loss": 0.0663, "step": 3536 }, { "epoch": 0.83, "learning_rate": 1.6973637248674834e-05, "loss": 0.0251, "step": 3537 }, { "epoch": 0.83, "learning_rate": 1.6971820604821447e-05, "loss": 0.0961, "step": 3538 }, { "epoch": 0.83, "learning_rate": 1.6970003513169378e-05, "loss": 0.074, "step": 3539 }, { "epoch": 0.83, "learning_rate": 1.696818597383534e-05, "loss": 0.0581, "step": 3540 }, { "epoch": 0.83, "learning_rate": 1.6966367986936072e-05, "loss": 0.0966, "step": 3541 }, { "epoch": 0.83, "learning_rate": 1.696454955258834e-05, "loss": 0.0511, "step": 3542 }, { "epoch": 0.83, "learning_rate": 1.6962730670908944e-05, "loss": 0.0551, "step": 3543 }, { "epoch": 0.83, "learning_rate": 1.696091134201471e-05, "loss": 0.0701, "step": 3544 }, { "epoch": 0.83, "learning_rate": 1.6959091566022496e-05, "loss": 0.0645, "step": 3545 }, { "epoch": 0.83, "learning_rate": 1.6957271343049182e-05, "loss": 0.0325, "step": 3546 }, { "epoch": 0.83, "learning_rate": 1.6955450673211684e-05, "loss": 0.0699, "step": 3547 }, { "epoch": 0.83, "learning_rate": 1.6953629556626935e-05, "loss": 0.0496, "step": 3548 }, { "epoch": 0.83, "learning_rate": 1.6951807993411914e-05, "loss": 0.0728, "step": 3549 }, { "epoch": 0.83, "learning_rate": 1.6949985983683618e-05, "loss": 0.0582, "step": 3550 }, { "epoch": 0.83, "learning_rate": 1.6948163527559072e-05, "loss": 0.097, "step": 3551 }, { "epoch": 0.83, "learning_rate": 1.694634062515533e-05, "loss": 0.0353, "step": 3552 }, { "epoch": 0.83, "learning_rate": 1.694451727658948e-05, "loss": 0.0469, "step": 3553 }, { "epoch": 0.83, "learning_rate": 1.6942693481978644e-05, "loss": 0.029, "step": 3554 }, { "epoch": 0.83, "learning_rate": 1.6940869241439946e-05, "loss": 0.0808, "step": 3555 }, { "epoch": 0.83, "learning_rate": 1.6939044555090565e-05, "loss": 0.1016, "step": 3556 }, { "epoch": 0.83, "learning_rate": 1.6937219423047705e-05, "loss": 0.0882, "step": 3557 }, { "epoch": 0.84, "learning_rate": 1.693539384542859e-05, "loss": 0.0584, "step": 3558 }, { "epoch": 0.84, "learning_rate": 1.6933567822350475e-05, "loss": 0.034, "step": 3559 }, { "epoch": 0.84, "learning_rate": 1.6931741353930645e-05, "loss": 0.0489, "step": 3560 }, { "epoch": 0.84, "learning_rate": 1.692991444028642e-05, "loss": 0.0345, "step": 3561 }, { "epoch": 0.84, "learning_rate": 1.6928087081535126e-05, "loss": 0.0875, "step": 3562 }, { "epoch": 0.84, "learning_rate": 1.6926259277794154e-05, "loss": 0.0671, "step": 3563 }, { "epoch": 0.84, "learning_rate": 1.6924431029180893e-05, "loss": 0.0399, "step": 3564 }, { "epoch": 0.84, "learning_rate": 1.6922602335812773e-05, "loss": 0.0653, "step": 3565 }, { "epoch": 0.84, "learning_rate": 1.692077319780725e-05, "loss": 0.0422, "step": 3566 }, { "epoch": 0.84, "learning_rate": 1.6918943615281808e-05, "loss": 0.0452, "step": 3567 }, { "epoch": 0.84, "learning_rate": 1.6917113588353967e-05, "loss": 0.0452, "step": 3568 }, { "epoch": 0.84, "learning_rate": 1.6915283117141262e-05, "loss": 0.0243, "step": 3569 }, { "epoch": 0.84, "learning_rate": 1.6913452201761265e-05, "loss": 0.0139, "step": 3570 }, { "epoch": 0.84, "learning_rate": 1.6911620842331576e-05, "loss": 0.1598, "step": 3571 }, { "epoch": 0.84, "learning_rate": 1.6909789038969826e-05, "loss": 0.0187, "step": 3572 }, { "epoch": 0.84, "learning_rate": 1.6907956791793668e-05, "loss": 0.0256, "step": 3573 }, { "epoch": 0.84, "learning_rate": 1.6906124100920787e-05, "loss": 0.1007, "step": 3574 }, { "epoch": 0.84, "learning_rate": 1.69042909664689e-05, "loss": 0.0661, "step": 3575 }, { "epoch": 0.84, "learning_rate": 1.6902457388555743e-05, "loss": 0.042, "step": 3576 }, { "epoch": 0.84, "learning_rate": 1.690062336729909e-05, "loss": 0.0668, "step": 3577 }, { "epoch": 0.84, "learning_rate": 1.6898788902816737e-05, "loss": 0.0713, "step": 3578 }, { "epoch": 0.84, "learning_rate": 1.689695399522652e-05, "loss": 0.0333, "step": 3579 }, { "epoch": 0.84, "learning_rate": 1.6895118644646282e-05, "loss": 0.1085, "step": 3580 }, { "epoch": 0.84, "learning_rate": 1.6893282851193916e-05, "loss": 0.0946, "step": 3581 }, { "epoch": 0.84, "learning_rate": 1.689144661498733e-05, "loss": 0.0348, "step": 3582 }, { "epoch": 0.84, "learning_rate": 1.6889609936144472e-05, "loss": 0.0806, "step": 3583 }, { "epoch": 0.84, "learning_rate": 1.6887772814783304e-05, "loss": 0.0892, "step": 3584 }, { "epoch": 0.84, "learning_rate": 1.6885935251021825e-05, "loss": 0.0206, "step": 3585 }, { "epoch": 0.84, "learning_rate": 1.6884097244978068e-05, "loss": 0.0327, "step": 3586 }, { "epoch": 0.84, "learning_rate": 1.6882258796770078e-05, "loss": 0.0333, "step": 3587 }, { "epoch": 0.84, "learning_rate": 1.6880419906515945e-05, "loss": 0.0449, "step": 3588 }, { "epoch": 0.84, "learning_rate": 1.687858057433378e-05, "loss": 0.0376, "step": 3589 }, { "epoch": 0.84, "learning_rate": 1.6876740800341716e-05, "loss": 0.0931, "step": 3590 }, { "epoch": 0.84, "learning_rate": 1.6874900584657932e-05, "loss": 0.0263, "step": 3591 }, { "epoch": 0.84, "learning_rate": 1.6873059927400616e-05, "loss": 0.0786, "step": 3592 }, { "epoch": 0.84, "learning_rate": 1.6871218828688e-05, "loss": 0.0384, "step": 3593 }, { "epoch": 0.84, "learning_rate": 1.6869377288638334e-05, "loss": 0.0869, "step": 3594 }, { "epoch": 0.84, "learning_rate": 1.68675353073699e-05, "loss": 0.0885, "step": 3595 }, { "epoch": 0.84, "learning_rate": 1.6865692885001008e-05, "loss": 0.0263, "step": 3596 }, { "epoch": 0.84, "learning_rate": 1.6863850021649996e-05, "loss": 0.0845, "step": 3597 }, { "epoch": 0.84, "learning_rate": 1.6862006717435228e-05, "loss": 0.0579, "step": 3598 }, { "epoch": 0.84, "learning_rate": 1.6860162972475102e-05, "loss": 0.0284, "step": 3599 }, { "epoch": 0.84, "learning_rate": 1.6858318786888045e-05, "loss": 0.0607, "step": 3600 }, { "epoch": 0.85, "learning_rate": 1.68564741607925e-05, "loss": 0.0245, "step": 3601 }, { "epoch": 0.85, "learning_rate": 1.685462909430696e-05, "loss": 0.0729, "step": 3602 }, { "epoch": 0.85, "learning_rate": 1.685278358754992e-05, "loss": 0.0492, "step": 3603 }, { "epoch": 0.85, "learning_rate": 1.6850937640639925e-05, "loss": 0.0753, "step": 3604 }, { "epoch": 0.85, "learning_rate": 1.6849091253695532e-05, "loss": 0.0373, "step": 3605 }, { "epoch": 0.85, "learning_rate": 1.6847244426835346e-05, "loss": 0.0564, "step": 3606 }, { "epoch": 0.85, "learning_rate": 1.6845397160177977e-05, "loss": 0.0424, "step": 3607 }, { "epoch": 0.85, "learning_rate": 1.684354945384208e-05, "loss": 0.0333, "step": 3608 }, { "epoch": 0.85, "learning_rate": 1.684170130794633e-05, "loss": 0.007, "step": 3609 }, { "epoch": 0.85, "learning_rate": 1.6839852722609435e-05, "loss": 0.0492, "step": 3610 }, { "epoch": 0.85, "learning_rate": 1.6838003697950126e-05, "loss": 0.0754, "step": 3611 }, { "epoch": 0.85, "learning_rate": 1.6836154234087175e-05, "loss": 0.0485, "step": 3612 }, { "epoch": 0.85, "learning_rate": 1.6834304331139362e-05, "loss": 0.0156, "step": 3613 }, { "epoch": 0.85, "learning_rate": 1.683245398922551e-05, "loss": 0.1925, "step": 3614 }, { "epoch": 0.85, "learning_rate": 1.6830603208464467e-05, "loss": 0.0599, "step": 3615 }, { "epoch": 0.85, "learning_rate": 1.6828751988975106e-05, "loss": 0.0578, "step": 3616 }, { "epoch": 0.85, "learning_rate": 1.6826900330876338e-05, "loss": 0.0124, "step": 3617 }, { "epoch": 0.85, "learning_rate": 1.682504823428708e-05, "loss": 0.0546, "step": 3618 }, { "epoch": 0.85, "learning_rate": 1.6823195699326307e-05, "loss": 0.0574, "step": 3619 }, { "epoch": 0.85, "learning_rate": 1.6821342726112996e-05, "loss": 0.019, "step": 3620 }, { "epoch": 0.85, "learning_rate": 1.6819489314766168e-05, "loss": 0.0132, "step": 3621 }, { "epoch": 0.85, "learning_rate": 1.681763546540487e-05, "loss": 0.0371, "step": 3622 }, { "epoch": 0.85, "learning_rate": 1.6815781178148167e-05, "loss": 0.0667, "step": 3623 }, { "epoch": 0.85, "learning_rate": 1.6813926453115165e-05, "loss": 0.0769, "step": 3624 }, { "epoch": 0.85, "learning_rate": 1.681207129042499e-05, "loss": 0.0224, "step": 3625 }, { "epoch": 0.85, "learning_rate": 1.6810215690196803e-05, "loss": 0.0698, "step": 3626 }, { "epoch": 0.85, "learning_rate": 1.6808359652549782e-05, "loss": 0.0258, "step": 3627 }, { "epoch": 0.85, "learning_rate": 1.6806503177603145e-05, "loss": 0.084, "step": 3628 }, { "epoch": 0.85, "learning_rate": 1.6804646265476135e-05, "loss": 0.0954, "step": 3629 }, { "epoch": 0.85, "learning_rate": 1.6802788916288016e-05, "loss": 0.0554, "step": 3630 }, { "epoch": 0.85, "learning_rate": 1.6800931130158086e-05, "loss": 0.0369, "step": 3631 }, { "epoch": 0.85, "learning_rate": 1.679907290720567e-05, "loss": 0.0186, "step": 3632 }, { "epoch": 0.85, "learning_rate": 1.6797214247550123e-05, "loss": 0.042, "step": 3633 }, { "epoch": 0.85, "learning_rate": 1.6795355151310827e-05, "loss": 0.0251, "step": 3634 }, { "epoch": 0.85, "learning_rate": 1.6793495618607193e-05, "loss": 0.087, "step": 3635 }, { "epoch": 0.85, "learning_rate": 1.6791635649558653e-05, "loss": 0.0638, "step": 3636 }, { "epoch": 0.85, "learning_rate": 1.6789775244284673e-05, "loss": 0.0378, "step": 3637 }, { "epoch": 0.85, "learning_rate": 1.6787914402904754e-05, "loss": 0.0952, "step": 3638 }, { "epoch": 0.85, "learning_rate": 1.6786053125538407e-05, "loss": 0.0647, "step": 3639 }, { "epoch": 0.85, "learning_rate": 1.678419141230519e-05, "loss": 0.0328, "step": 3640 }, { "epoch": 0.85, "learning_rate": 1.6782329263324674e-05, "loss": 0.0732, "step": 3641 }, { "epoch": 0.85, "learning_rate": 1.6780466678716466e-05, "loss": 0.0406, "step": 3642 }, { "epoch": 0.85, "learning_rate": 1.6778603658600206e-05, "loss": 0.0371, "step": 3643 }, { "epoch": 0.86, "learning_rate": 1.6776740203095547e-05, "loss": 0.0613, "step": 3644 }, { "epoch": 0.86, "learning_rate": 1.6774876312322185e-05, "loss": 0.0483, "step": 3645 }, { "epoch": 0.86, "learning_rate": 1.6773011986399832e-05, "loss": 0.1323, "step": 3646 }, { "epoch": 0.86, "learning_rate": 1.6771147225448235e-05, "loss": 0.0566, "step": 3647 }, { "epoch": 0.86, "learning_rate": 1.676928202958717e-05, "loss": 0.0804, "step": 3648 }, { "epoch": 0.86, "learning_rate": 1.6767416398936433e-05, "loss": 0.054, "step": 3649 }, { "epoch": 0.86, "learning_rate": 1.6765550333615857e-05, "loss": 0.1077, "step": 3650 }, { "epoch": 0.86, "learning_rate": 1.6763683833745298e-05, "loss": 0.0434, "step": 3651 }, { "epoch": 0.86, "learning_rate": 1.6761816899444642e-05, "loss": 0.0458, "step": 3652 }, { "epoch": 0.86, "learning_rate": 1.67599495308338e-05, "loss": 0.0357, "step": 3653 }, { "epoch": 0.86, "learning_rate": 1.6758081728032712e-05, "loss": 0.0717, "step": 3654 }, { "epoch": 0.86, "learning_rate": 1.6756213491161348e-05, "loss": 0.0381, "step": 3655 }, { "epoch": 0.86, "learning_rate": 1.6754344820339707e-05, "loss": 0.0518, "step": 3656 }, { "epoch": 0.86, "learning_rate": 1.675247571568781e-05, "loss": 0.0523, "step": 3657 }, { "epoch": 0.86, "learning_rate": 1.675060617732571e-05, "loss": 0.0641, "step": 3658 }, { "epoch": 0.86, "learning_rate": 1.6748736205373485e-05, "loss": 0.0313, "step": 3659 }, { "epoch": 0.86, "learning_rate": 1.674686579995125e-05, "loss": 0.0154, "step": 3660 }, { "epoch": 0.86, "learning_rate": 1.674499496117913e-05, "loss": 0.0359, "step": 3661 }, { "epoch": 0.86, "learning_rate": 1.6743123689177297e-05, "loss": 0.0296, "step": 3662 }, { "epoch": 0.86, "learning_rate": 1.6741251984065942e-05, "loss": 0.0298, "step": 3663 }, { "epoch": 0.86, "learning_rate": 1.673937984596528e-05, "loss": 0.0707, "step": 3664 }, { "epoch": 0.86, "learning_rate": 1.6737507274995558e-05, "loss": 0.0893, "step": 3665 }, { "epoch": 0.86, "learning_rate": 1.6735634271277056e-05, "loss": 0.0117, "step": 3666 }, { "epoch": 0.86, "learning_rate": 1.6733760834930076e-05, "loss": 0.0595, "step": 3667 }, { "epoch": 0.86, "learning_rate": 1.673188696607494e-05, "loss": 0.0476, "step": 3668 }, { "epoch": 0.86, "learning_rate": 1.673001266483202e-05, "loss": 0.0836, "step": 3669 }, { "epoch": 0.86, "learning_rate": 1.6728137931321692e-05, "loss": 0.0518, "step": 3670 }, { "epoch": 0.86, "learning_rate": 1.672626276566437e-05, "loss": 0.0454, "step": 3671 }, { "epoch": 0.86, "learning_rate": 1.6724387167980497e-05, "loss": 0.0606, "step": 3672 }, { "epoch": 0.86, "learning_rate": 1.6722511138390545e-05, "loss": 0.0341, "step": 3673 }, { "epoch": 0.86, "learning_rate": 1.672063467701501e-05, "loss": 0.0245, "step": 3674 }, { "epoch": 0.86, "learning_rate": 1.6718757783974413e-05, "loss": 0.0253, "step": 3675 }, { "epoch": 0.86, "learning_rate": 1.6716880459389313e-05, "loss": 0.0419, "step": 3676 }, { "epoch": 0.86, "learning_rate": 1.6715002703380287e-05, "loss": 0.0764, "step": 3677 }, { "epoch": 0.86, "learning_rate": 1.671312451606794e-05, "loss": 0.0514, "step": 3678 }, { "epoch": 0.86, "learning_rate": 1.6711245897572906e-05, "loss": 0.018, "step": 3679 }, { "epoch": 0.86, "learning_rate": 1.6709366848015858e-05, "loss": 0.0179, "step": 3680 }, { "epoch": 0.86, "learning_rate": 1.670748736751748e-05, "loss": 0.0771, "step": 3681 }, { "epoch": 0.86, "learning_rate": 1.6705607456198492e-05, "loss": 0.0348, "step": 3682 }, { "epoch": 0.86, "learning_rate": 1.6703727114179636e-05, "loss": 0.0493, "step": 3683 }, { "epoch": 0.86, "learning_rate": 1.6701846341581695e-05, "loss": 0.1377, "step": 3684 }, { "epoch": 0.86, "learning_rate": 1.6699965138525466e-05, "loss": 0.0256, "step": 3685 }, { "epoch": 0.87, "learning_rate": 1.669808350513178e-05, "loss": 0.0386, "step": 3686 }, { "epoch": 0.87, "learning_rate": 1.6696201441521488e-05, "loss": 0.0439, "step": 3687 }, { "epoch": 0.87, "learning_rate": 1.6694318947815478e-05, "loss": 0.0882, "step": 3688 }, { "epoch": 0.87, "learning_rate": 1.6692436024134667e-05, "loss": 0.053, "step": 3689 }, { "epoch": 0.87, "learning_rate": 1.669055267059999e-05, "loss": 0.0762, "step": 3690 }, { "epoch": 0.87, "learning_rate": 1.6688668887332414e-05, "loss": 0.023, "step": 3691 }, { "epoch": 0.87, "learning_rate": 1.6686784674452936e-05, "loss": 0.0693, "step": 3692 }, { "epoch": 0.87, "learning_rate": 1.668490003208258e-05, "loss": 0.071, "step": 3693 }, { "epoch": 0.87, "learning_rate": 1.668301496034239e-05, "loss": 0.0788, "step": 3694 }, { "epoch": 0.87, "learning_rate": 1.668112945935345e-05, "loss": 0.0584, "step": 3695 }, { "epoch": 0.87, "learning_rate": 1.667924352923686e-05, "loss": 0.0306, "step": 3696 }, { "epoch": 0.87, "learning_rate": 1.667735717011376e-05, "loss": 0.0396, "step": 3697 }, { "epoch": 0.87, "learning_rate": 1.667547038210531e-05, "loss": 0.0124, "step": 3698 }, { "epoch": 0.87, "learning_rate": 1.667358316533269e-05, "loss": 0.0513, "step": 3699 }, { "epoch": 0.87, "learning_rate": 1.667169551991712e-05, "loss": 0.0521, "step": 3700 }, { "epoch": 0.87, "learning_rate": 1.6669807445979845e-05, "loss": 0.1071, "step": 3701 }, { "epoch": 0.87, "learning_rate": 1.666791894364213e-05, "loss": 0.0424, "step": 3702 }, { "epoch": 0.87, "learning_rate": 1.6666030013025284e-05, "loss": 0.1395, "step": 3703 }, { "epoch": 0.87, "learning_rate": 1.666414065425062e-05, "loss": 0.1077, "step": 3704 }, { "epoch": 0.87, "learning_rate": 1.6662250867439497e-05, "loss": 0.0427, "step": 3705 }, { "epoch": 0.87, "learning_rate": 1.6660360652713297e-05, "loss": 0.0587, "step": 3706 }, { "epoch": 0.87, "learning_rate": 1.665847001019343e-05, "loss": 0.0425, "step": 3707 }, { "epoch": 0.87, "learning_rate": 1.6656578940001327e-05, "loss": 0.0638, "step": 3708 }, { "epoch": 0.87, "learning_rate": 1.665468744225845e-05, "loss": 0.0745, "step": 3709 }, { "epoch": 0.87, "learning_rate": 1.6652795517086293e-05, "loss": 0.0548, "step": 3710 }, { "epoch": 0.87, "learning_rate": 1.665090316460637e-05, "loss": 0.0565, "step": 3711 }, { "epoch": 0.87, "learning_rate": 1.6649010384940235e-05, "loss": 0.0295, "step": 3712 }, { "epoch": 0.87, "learning_rate": 1.6647117178209452e-05, "loss": 0.0379, "step": 3713 }, { "epoch": 0.87, "learning_rate": 1.6645223544535626e-05, "loss": 0.0202, "step": 3714 }, { "epoch": 0.87, "learning_rate": 1.664332948404038e-05, "loss": 0.1087, "step": 3715 }, { "epoch": 0.87, "learning_rate": 1.6641434996845377e-05, "loss": 0.0864, "step": 3716 }, { "epoch": 0.87, "learning_rate": 1.663954008307229e-05, "loss": 0.0611, "step": 3717 }, { "epoch": 0.87, "learning_rate": 1.6637644742842838e-05, "loss": 0.0482, "step": 3718 }, { "epoch": 0.87, "learning_rate": 1.663574897627875e-05, "loss": 0.0154, "step": 3719 }, { "epoch": 0.87, "learning_rate": 1.66338527835018e-05, "loss": 0.0306, "step": 3720 }, { "epoch": 0.87, "learning_rate": 1.6631956164633768e-05, "loss": 0.0849, "step": 3721 }, { "epoch": 0.87, "learning_rate": 1.6630059119796486e-05, "loss": 0.1051, "step": 3722 }, { "epoch": 0.87, "learning_rate": 1.6628161649111794e-05, "loss": 0.0799, "step": 3723 }, { "epoch": 0.87, "learning_rate": 1.6626263752701567e-05, "loss": 0.0323, "step": 3724 }, { "epoch": 0.87, "learning_rate": 1.662436543068771e-05, "loss": 0.0224, "step": 3725 }, { "epoch": 0.87, "learning_rate": 1.6622466683192146e-05, "loss": 0.0554, "step": 3726 }, { "epoch": 0.87, "learning_rate": 1.662056751033683e-05, "loss": 0.0367, "step": 3727 }, { "epoch": 0.87, "learning_rate": 1.6618667912243754e-05, "loss": 0.0682, "step": 3728 }, { "epoch": 0.88, "learning_rate": 1.6616767889034925e-05, "loss": 0.1431, "step": 3729 }, { "epoch": 0.88, "learning_rate": 1.6614867440832377e-05, "loss": 0.0461, "step": 3730 }, { "epoch": 0.88, "learning_rate": 1.6612966567758184e-05, "loss": 0.0317, "step": 3731 }, { "epoch": 0.88, "learning_rate": 1.661106526993443e-05, "loss": 0.0942, "step": 3732 }, { "epoch": 0.88, "learning_rate": 1.660916354748324e-05, "loss": 0.0868, "step": 3733 }, { "epoch": 0.88, "learning_rate": 1.660726140052676e-05, "loss": 0.0348, "step": 3734 }, { "epoch": 0.88, "learning_rate": 1.6605358829187163e-05, "loss": 0.0727, "step": 3735 }, { "epoch": 0.88, "learning_rate": 1.6603455833586653e-05, "loss": 0.0348, "step": 3736 }, { "epoch": 0.88, "learning_rate": 1.6601552413847456e-05, "loss": 0.0431, "step": 3737 }, { "epoch": 0.88, "learning_rate": 1.6599648570091833e-05, "loss": 0.0229, "step": 3738 }, { "epoch": 0.88, "learning_rate": 1.659774430244206e-05, "loss": 0.0652, "step": 3739 }, { "epoch": 0.88, "learning_rate": 1.6595839611020462e-05, "loss": 0.0285, "step": 3740 }, { "epoch": 0.88, "learning_rate": 1.659393449594936e-05, "loss": 0.0684, "step": 3741 }, { "epoch": 0.88, "learning_rate": 1.659202895735113e-05, "loss": 0.0355, "step": 3742 }, { "epoch": 0.88, "learning_rate": 1.6590122995348158e-05, "loss": 0.0504, "step": 3743 }, { "epoch": 0.88, "learning_rate": 1.658821661006287e-05, "loss": 0.1138, "step": 3744 }, { "epoch": 0.88, "learning_rate": 1.658630980161771e-05, "loss": 0.0539, "step": 3745 }, { "epoch": 0.88, "learning_rate": 1.6584402570135148e-05, "loss": 0.0039, "step": 3746 }, { "epoch": 0.88, "learning_rate": 1.6582494915737693e-05, "loss": 0.0275, "step": 3747 }, { "epoch": 0.88, "learning_rate": 1.6580586838547866e-05, "loss": 0.0138, "step": 3748 }, { "epoch": 0.88, "learning_rate": 1.6578678338688225e-05, "loss": 0.0619, "step": 3749 }, { "epoch": 0.88, "learning_rate": 1.6576769416281354e-05, "loss": 0.057, "step": 3750 }, { "epoch": 0.88, "learning_rate": 1.6574860071449862e-05, "loss": 0.0464, "step": 3751 }, { "epoch": 0.88, "learning_rate": 1.6572950304316385e-05, "loss": 0.0362, "step": 3752 }, { "epoch": 0.88, "learning_rate": 1.6571040115003588e-05, "loss": 0.0655, "step": 3753 }, { "epoch": 0.88, "learning_rate": 1.656912950363416e-05, "loss": 0.0219, "step": 3754 }, { "epoch": 0.88, "learning_rate": 1.656721847033082e-05, "loss": 0.0481, "step": 3755 }, { "epoch": 0.88, "learning_rate": 1.6565307015216318e-05, "loss": 0.0492, "step": 3756 }, { "epoch": 0.88, "learning_rate": 1.6563395138413418e-05, "loss": 0.0787, "step": 3757 }, { "epoch": 0.88, "learning_rate": 1.6561482840044923e-05, "loss": 0.0151, "step": 3758 }, { "epoch": 0.88, "learning_rate": 1.6559570120233665e-05, "loss": 0.0379, "step": 3759 }, { "epoch": 0.88, "learning_rate": 1.655765697910249e-05, "loss": 0.0316, "step": 3760 }, { "epoch": 0.88, "learning_rate": 1.6555743416774284e-05, "loss": 0.078, "step": 3761 }, { "epoch": 0.88, "learning_rate": 1.655382943337195e-05, "loss": 0.117, "step": 3762 }, { "epoch": 0.88, "learning_rate": 1.6551915029018425e-05, "loss": 0.0992, "step": 3763 }, { "epoch": 0.88, "learning_rate": 1.6550000203836672e-05, "loss": 0.0219, "step": 3764 }, { "epoch": 0.88, "learning_rate": 1.654808495794968e-05, "loss": 0.1041, "step": 3765 }, { "epoch": 0.88, "learning_rate": 1.6546169291480466e-05, "loss": 0.0622, "step": 3766 }, { "epoch": 0.88, "learning_rate": 1.6544253204552065e-05, "loss": 0.1693, "step": 3767 }, { "epoch": 0.88, "learning_rate": 1.6542336697287555e-05, "loss": 0.0845, "step": 3768 }, { "epoch": 0.88, "learning_rate": 1.654041976981003e-05, "loss": 0.0785, "step": 3769 }, { "epoch": 0.88, "learning_rate": 1.6538502422242614e-05, "loss": 0.0288, "step": 3770 }, { "epoch": 0.89, "learning_rate": 1.6536584654708458e-05, "loss": 0.0227, "step": 3771 }, { "epoch": 0.89, "learning_rate": 1.6534666467330743e-05, "loss": 0.036, "step": 3772 }, { "epoch": 0.89, "learning_rate": 1.653274786023267e-05, "loss": 0.0596, "step": 3773 }, { "epoch": 0.89, "learning_rate": 1.6530828833537467e-05, "loss": 0.0535, "step": 3774 }, { "epoch": 0.89, "learning_rate": 1.6528909387368402e-05, "loss": 0.0203, "step": 3775 }, { "epoch": 0.89, "learning_rate": 1.6526989521848755e-05, "loss": 0.0479, "step": 3776 }, { "epoch": 0.89, "learning_rate": 1.652506923710184e-05, "loss": 0.0455, "step": 3777 }, { "epoch": 0.89, "learning_rate": 1.6523148533250994e-05, "loss": 0.063, "step": 3778 }, { "epoch": 0.89, "learning_rate": 1.6521227410419583e-05, "loss": 0.0516, "step": 3779 }, { "epoch": 0.89, "learning_rate": 1.6519305868731007e-05, "loss": 0.0462, "step": 3780 }, { "epoch": 0.89, "learning_rate": 1.6517383908308682e-05, "loss": 0.0624, "step": 3781 }, { "epoch": 0.89, "learning_rate": 1.6515461529276053e-05, "loss": 0.0327, "step": 3782 }, { "epoch": 0.89, "learning_rate": 1.65135387317566e-05, "loss": 0.055, "step": 3783 }, { "epoch": 0.89, "learning_rate": 1.6511615515873816e-05, "loss": 0.0237, "step": 3784 }, { "epoch": 0.89, "learning_rate": 1.6509691881751236e-05, "loss": 0.0271, "step": 3785 }, { "epoch": 0.89, "learning_rate": 1.6507767829512407e-05, "loss": 0.0315, "step": 3786 }, { "epoch": 0.89, "learning_rate": 1.650584335928092e-05, "loss": 0.0819, "step": 3787 }, { "epoch": 0.89, "learning_rate": 1.6503918471180375e-05, "loss": 0.0235, "step": 3788 }, { "epoch": 0.89, "learning_rate": 1.6501993165334417e-05, "loss": 0.0661, "step": 3789 }, { "epoch": 0.89, "learning_rate": 1.6500067441866695e-05, "loss": 0.0849, "step": 3790 }, { "epoch": 0.89, "learning_rate": 1.6498141300900905e-05, "loss": 0.0321, "step": 3791 }, { "epoch": 0.89, "learning_rate": 1.6496214742560765e-05, "loss": 0.0101, "step": 3792 }, { "epoch": 0.89, "learning_rate": 1.6494287766970014e-05, "loss": 0.0558, "step": 3793 }, { "epoch": 0.89, "learning_rate": 1.649236037425242e-05, "loss": 0.0737, "step": 3794 }, { "epoch": 0.89, "learning_rate": 1.649043256453178e-05, "loss": 0.0569, "step": 3795 }, { "epoch": 0.89, "learning_rate": 1.648850433793192e-05, "loss": 0.0261, "step": 3796 }, { "epoch": 0.89, "learning_rate": 1.6486575694576687e-05, "loss": 0.0348, "step": 3797 }, { "epoch": 0.89, "learning_rate": 1.648464663458996e-05, "loss": 0.0168, "step": 3798 }, { "epoch": 0.89, "learning_rate": 1.6482717158095637e-05, "loss": 0.0375, "step": 3799 }, { "epoch": 0.89, "learning_rate": 1.648078726521765e-05, "loss": 0.0306, "step": 3800 }, { "epoch": 0.89, "learning_rate": 1.647885695607996e-05, "loss": 0.1026, "step": 3801 }, { "epoch": 0.89, "learning_rate": 1.6476926230806543e-05, "loss": 0.1368, "step": 3802 }, { "epoch": 0.89, "learning_rate": 1.647499508952142e-05, "loss": 0.0159, "step": 3803 }, { "epoch": 0.89, "learning_rate": 1.6473063532348615e-05, "loss": 0.0212, "step": 3804 }, { "epoch": 0.89, "learning_rate": 1.6471131559412194e-05, "loss": 0.0433, "step": 3805 }, { "epoch": 0.89, "learning_rate": 1.6469199170836258e-05, "loss": 0.0809, "step": 3806 }, { "epoch": 0.89, "learning_rate": 1.646726636674491e-05, "loss": 0.027, "step": 3807 }, { "epoch": 0.89, "learning_rate": 1.6465333147262302e-05, "loss": 0.0273, "step": 3808 }, { "epoch": 0.89, "learning_rate": 1.646339951251261e-05, "loss": 0.0466, "step": 3809 }, { "epoch": 0.89, "learning_rate": 1.6461465462620013e-05, "loss": 0.017, "step": 3810 }, { "epoch": 0.89, "learning_rate": 1.645953099770875e-05, "loss": 0.0456, "step": 3811 }, { "epoch": 0.89, "learning_rate": 1.6457596117903065e-05, "loss": 0.1212, "step": 3812 }, { "epoch": 0.89, "learning_rate": 1.645566082332724e-05, "loss": 0.0457, "step": 3813 }, { "epoch": 0.9, "learning_rate": 1.645372511410557e-05, "loss": 0.0144, "step": 3814 }, { "epoch": 0.9, "learning_rate": 1.6451788990362392e-05, "loss": 0.0371, "step": 3815 }, { "epoch": 0.9, "learning_rate": 1.6449852452222063e-05, "loss": 0.0135, "step": 3816 }, { "epoch": 0.9, "learning_rate": 1.644791549980896e-05, "loss": 0.0397, "step": 3817 }, { "epoch": 0.9, "learning_rate": 1.6445978133247503e-05, "loss": 0.0487, "step": 3818 }, { "epoch": 0.9, "learning_rate": 1.644404035266212e-05, "loss": 0.0218, "step": 3819 }, { "epoch": 0.9, "learning_rate": 1.644210215817728e-05, "loss": 0.0894, "step": 3820 }, { "epoch": 0.9, "learning_rate": 1.6440163549917467e-05, "loss": 0.0203, "step": 3821 }, { "epoch": 0.9, "learning_rate": 1.6438224528007204e-05, "loss": 0.0421, "step": 3822 }, { "epoch": 0.9, "learning_rate": 1.6436285092571024e-05, "loss": 0.0701, "step": 3823 }, { "epoch": 0.9, "learning_rate": 1.6434345243733513e-05, "loss": 0.061, "step": 3824 }, { "epoch": 0.9, "learning_rate": 1.6432404981619252e-05, "loss": 0.0348, "step": 3825 }, { "epoch": 0.9, "learning_rate": 1.643046430635287e-05, "loss": 0.0198, "step": 3826 }, { "epoch": 0.9, "learning_rate": 1.6428523218059017e-05, "loss": 0.0405, "step": 3827 }, { "epoch": 0.9, "learning_rate": 1.6426581716862365e-05, "loss": 0.0353, "step": 3828 }, { "epoch": 0.9, "learning_rate": 1.6424639802887615e-05, "loss": 0.0231, "step": 3829 }, { "epoch": 0.9, "learning_rate": 1.6422697476259504e-05, "loss": 0.1586, "step": 3830 }, { "epoch": 0.9, "learning_rate": 1.642075473710278e-05, "loss": 0.0649, "step": 3831 }, { "epoch": 0.9, "learning_rate": 1.6418811585542228e-05, "loss": 0.0285, "step": 3832 }, { "epoch": 0.9, "learning_rate": 1.641686802170266e-05, "loss": 0.0161, "step": 3833 }, { "epoch": 0.9, "learning_rate": 1.64149240457089e-05, "loss": 0.0818, "step": 3834 }, { "epoch": 0.9, "learning_rate": 1.6412979657685816e-05, "loss": 0.0562, "step": 3835 }, { "epoch": 0.9, "learning_rate": 1.6411034857758298e-05, "loss": 0.0349, "step": 3836 }, { "epoch": 0.9, "learning_rate": 1.640908964605125e-05, "loss": 0.062, "step": 3837 }, { "epoch": 0.9, "learning_rate": 1.640714402268963e-05, "loss": 0.0382, "step": 3838 }, { "epoch": 0.9, "learning_rate": 1.6405197987798387e-05, "loss": 0.0606, "step": 3839 }, { "epoch": 0.9, "learning_rate": 1.6403251541502524e-05, "loss": 0.025, "step": 3840 }, { "epoch": 0.9, "learning_rate": 1.6401304683927056e-05, "loss": 0.0391, "step": 3841 }, { "epoch": 0.9, "learning_rate": 1.6399357415197038e-05, "loss": 0.0369, "step": 3842 }, { "epoch": 0.9, "learning_rate": 1.6397409735437533e-05, "loss": 0.1067, "step": 3843 }, { "epoch": 0.9, "learning_rate": 1.639546164477365e-05, "loss": 0.1149, "step": 3844 }, { "epoch": 0.9, "learning_rate": 1.63935131433305e-05, "loss": 0.0835, "step": 3845 }, { "epoch": 0.9, "learning_rate": 1.6391564231233248e-05, "loss": 0.0294, "step": 3846 }, { "epoch": 0.9, "learning_rate": 1.6389614908607067e-05, "loss": 0.0141, "step": 3847 }, { "epoch": 0.9, "learning_rate": 1.638766517557716e-05, "loss": 0.0873, "step": 3848 }, { "epoch": 0.9, "learning_rate": 1.6385715032268766e-05, "loss": 0.0268, "step": 3849 }, { "epoch": 0.9, "learning_rate": 1.638376447880713e-05, "loss": 0.0801, "step": 3850 }, { "epoch": 0.9, "learning_rate": 1.6381813515317547e-05, "loss": 0.0602, "step": 3851 }, { "epoch": 0.9, "learning_rate": 1.637986214192532e-05, "loss": 0.0088, "step": 3852 }, { "epoch": 0.9, "learning_rate": 1.6377910358755785e-05, "loss": 0.0595, "step": 3853 }, { "epoch": 0.9, "learning_rate": 1.6375958165934312e-05, "loss": 0.0527, "step": 3854 }, { "epoch": 0.9, "learning_rate": 1.637400556358628e-05, "loss": 0.0336, "step": 3855 }, { "epoch": 0.9, "learning_rate": 1.6372052551837114e-05, "loss": 0.1181, "step": 3856 }, { "epoch": 0.91, "learning_rate": 1.637009913081225e-05, "loss": 0.0805, "step": 3857 }, { "epoch": 0.91, "learning_rate": 1.6368145300637156e-05, "loss": 0.12, "step": 3858 }, { "epoch": 0.91, "learning_rate": 1.6366191061437327e-05, "loss": 0.0359, "step": 3859 }, { "epoch": 0.91, "learning_rate": 1.6364236413338285e-05, "loss": 0.049, "step": 3860 }, { "epoch": 0.91, "learning_rate": 1.636228135646557e-05, "loss": 0.0537, "step": 3861 }, { "epoch": 0.91, "learning_rate": 1.6360325890944767e-05, "loss": 0.0151, "step": 3862 }, { "epoch": 0.91, "learning_rate": 1.6358370016901462e-05, "loss": 0.0588, "step": 3863 }, { "epoch": 0.91, "learning_rate": 1.6356413734461293e-05, "loss": 0.079, "step": 3864 }, { "epoch": 0.91, "learning_rate": 1.6354457043749902e-05, "loss": 0.0294, "step": 3865 }, { "epoch": 0.91, "learning_rate": 1.6352499944892968e-05, "loss": 0.1301, "step": 3866 }, { "epoch": 0.91, "learning_rate": 1.6350542438016197e-05, "loss": 0.0388, "step": 3867 }, { "epoch": 0.91, "learning_rate": 1.634858452324532e-05, "loss": 0.0684, "step": 3868 }, { "epoch": 0.91, "learning_rate": 1.6346626200706095e-05, "loss": 0.0377, "step": 3869 }, { "epoch": 0.91, "learning_rate": 1.63446674705243e-05, "loss": 0.0386, "step": 3870 }, { "epoch": 0.91, "learning_rate": 1.634270833282575e-05, "loss": 0.0862, "step": 3871 }, { "epoch": 0.91, "learning_rate": 1.6340748787736273e-05, "loss": 0.0621, "step": 3872 }, { "epoch": 0.91, "learning_rate": 1.6338788835381735e-05, "loss": 0.0295, "step": 3873 }, { "epoch": 0.91, "learning_rate": 1.6336828475888022e-05, "loss": 0.0462, "step": 3874 }, { "epoch": 0.91, "learning_rate": 1.633486770938105e-05, "loss": 0.039, "step": 3875 }, { "epoch": 0.91, "learning_rate": 1.633290653598675e-05, "loss": 0.0696, "step": 3876 }, { "epoch": 0.91, "learning_rate": 1.63309449558311e-05, "loss": 0.0626, "step": 3877 }, { "epoch": 0.91, "learning_rate": 1.6328982969040084e-05, "loss": 0.0678, "step": 3878 }, { "epoch": 0.91, "learning_rate": 1.6327020575739722e-05, "loss": 0.036, "step": 3879 }, { "epoch": 0.91, "learning_rate": 1.6325057776056058e-05, "loss": 0.0553, "step": 3880 }, { "epoch": 0.91, "learning_rate": 1.6323094570115163e-05, "loss": 0.0283, "step": 3881 }, { "epoch": 0.91, "learning_rate": 1.632113095804313e-05, "loss": 0.0092, "step": 3882 }, { "epoch": 0.91, "learning_rate": 1.6319166939966083e-05, "loss": 0.0503, "step": 3883 }, { "epoch": 0.91, "learning_rate": 1.6317202516010177e-05, "loss": 0.0387, "step": 3884 }, { "epoch": 0.91, "learning_rate": 1.6315237686301575e-05, "loss": 0.0535, "step": 3885 }, { "epoch": 0.91, "learning_rate": 1.631327245096649e-05, "loss": 0.0866, "step": 3886 }, { "epoch": 0.91, "learning_rate": 1.6311306810131138e-05, "loss": 0.0144, "step": 3887 }, { "epoch": 0.91, "learning_rate": 1.6309340763921776e-05, "loss": 0.0321, "step": 3888 }, { "epoch": 0.91, "learning_rate": 1.630737431246469e-05, "loss": 0.0334, "step": 3889 }, { "epoch": 0.91, "learning_rate": 1.6305407455886167e-05, "loss": 0.0342, "step": 3890 }, { "epoch": 0.91, "learning_rate": 1.6303440194312553e-05, "loss": 0.0793, "step": 3891 }, { "epoch": 0.91, "learning_rate": 1.6301472527870204e-05, "loss": 0.0457, "step": 3892 }, { "epoch": 0.91, "learning_rate": 1.6299504456685494e-05, "loss": 0.0842, "step": 3893 }, { "epoch": 0.91, "learning_rate": 1.629753598088484e-05, "loss": 0.0465, "step": 3894 }, { "epoch": 0.91, "learning_rate": 1.6295567100594673e-05, "loss": 0.0427, "step": 3895 }, { "epoch": 0.91, "learning_rate": 1.6293597815941455e-05, "loss": 0.0549, "step": 3896 }, { "epoch": 0.91, "learning_rate": 1.6291628127051674e-05, "loss": 0.0529, "step": 3897 }, { "epoch": 0.91, "learning_rate": 1.628965803405184e-05, "loss": 0.0189, "step": 3898 }, { "epoch": 0.92, "learning_rate": 1.6287687537068492e-05, "loss": 0.0521, "step": 3899 }, { "epoch": 0.92, "learning_rate": 1.6285716636228194e-05, "loss": 0.0931, "step": 3900 }, { "epoch": 0.92, "learning_rate": 1.628374533165754e-05, "loss": 0.0295, "step": 3901 }, { "epoch": 0.92, "learning_rate": 1.6281773623483144e-05, "loss": 0.0394, "step": 3902 }, { "epoch": 0.92, "learning_rate": 1.627980151183165e-05, "loss": 0.0148, "step": 3903 }, { "epoch": 0.92, "learning_rate": 1.6277828996829726e-05, "loss": 0.0743, "step": 3904 }, { "epoch": 0.92, "learning_rate": 1.6275856078604065e-05, "loss": 0.0231, "step": 3905 }, { "epoch": 0.92, "learning_rate": 1.6273882757281385e-05, "loss": 0.0504, "step": 3906 }, { "epoch": 0.92, "learning_rate": 1.6271909032988437e-05, "loss": 0.0506, "step": 3907 }, { "epoch": 0.92, "learning_rate": 1.626993490585199e-05, "loss": 0.0446, "step": 3908 }, { "epoch": 0.92, "learning_rate": 1.6267960375998845e-05, "loss": 0.0319, "step": 3909 }, { "epoch": 0.92, "learning_rate": 1.6265985443555822e-05, "loss": 0.0604, "step": 3910 }, { "epoch": 0.92, "learning_rate": 1.626401010864977e-05, "loss": 0.0283, "step": 3911 }, { "epoch": 0.92, "learning_rate": 1.6262034371407567e-05, "loss": 0.0383, "step": 3912 }, { "epoch": 0.92, "learning_rate": 1.6260058231956112e-05, "loss": 0.108, "step": 3913 }, { "epoch": 0.92, "learning_rate": 1.6258081690422336e-05, "loss": 0.0202, "step": 3914 }, { "epoch": 0.92, "learning_rate": 1.625610474693319e-05, "loss": 0.0829, "step": 3915 }, { "epoch": 0.92, "learning_rate": 1.625412740161565e-05, "loss": 0.0607, "step": 3916 }, { "epoch": 0.92, "learning_rate": 1.6252149654596722e-05, "loss": 0.0541, "step": 3917 }, { "epoch": 0.92, "learning_rate": 1.6250171506003435e-05, "loss": 0.0125, "step": 3918 }, { "epoch": 0.92, "learning_rate": 1.624819295596285e-05, "loss": 0.0565, "step": 3919 }, { "epoch": 0.92, "learning_rate": 1.6246214004602044e-05, "loss": 0.0236, "step": 3920 }, { "epoch": 0.92, "learning_rate": 1.6244234652048127e-05, "loss": 0.0258, "step": 3921 }, { "epoch": 0.92, "learning_rate": 1.6242254898428233e-05, "loss": 0.0286, "step": 3922 }, { "epoch": 0.92, "learning_rate": 1.6240274743869518e-05, "loss": 0.0742, "step": 3923 }, { "epoch": 0.92, "learning_rate": 1.6238294188499173e-05, "loss": 0.0447, "step": 3924 }, { "epoch": 0.92, "learning_rate": 1.6236313232444402e-05, "loss": 0.0491, "step": 3925 }, { "epoch": 0.92, "learning_rate": 1.6234331875832443e-05, "loss": 0.0447, "step": 3926 }, { "epoch": 0.92, "learning_rate": 1.623235011879056e-05, "loss": 0.0226, "step": 3927 }, { "epoch": 0.92, "learning_rate": 1.623036796144604e-05, "loss": 0.0418, "step": 3928 }, { "epoch": 0.92, "learning_rate": 1.62283854039262e-05, "loss": 0.0265, "step": 3929 }, { "epoch": 0.92, "learning_rate": 1.622640244635837e-05, "loss": 0.0608, "step": 3930 }, { "epoch": 0.92, "learning_rate": 1.6224419088869926e-05, "loss": 0.0854, "step": 3931 }, { "epoch": 0.92, "learning_rate": 1.622243533158825e-05, "loss": 0.0435, "step": 3932 }, { "epoch": 0.92, "learning_rate": 1.6220451174640763e-05, "loss": 0.0085, "step": 3933 }, { "epoch": 0.92, "learning_rate": 1.6218466618154905e-05, "loss": 0.0327, "step": 3934 }, { "epoch": 0.92, "learning_rate": 1.621648166225815e-05, "loss": 0.0385, "step": 3935 }, { "epoch": 0.92, "learning_rate": 1.621449630707798e-05, "loss": 0.0269, "step": 3936 }, { "epoch": 0.92, "learning_rate": 1.6212510552741922e-05, "loss": 0.138, "step": 3937 }, { "epoch": 0.92, "learning_rate": 1.6210524399377518e-05, "loss": 0.0092, "step": 3938 }, { "epoch": 0.92, "learning_rate": 1.620853784711234e-05, "loss": 0.0064, "step": 3939 }, { "epoch": 0.92, "learning_rate": 1.620655089607398e-05, "loss": 0.0635, "step": 3940 }, { "epoch": 0.92, "learning_rate": 1.6204563546390068e-05, "loss": 0.1023, "step": 3941 }, { "epoch": 0.93, "learning_rate": 1.620257579818824e-05, "loss": 0.0517, "step": 3942 }, { "epoch": 0.93, "learning_rate": 1.6200587651596178e-05, "loss": 0.0359, "step": 3943 }, { "epoch": 0.93, "learning_rate": 1.6198599106741572e-05, "loss": 0.0534, "step": 3944 }, { "epoch": 0.93, "learning_rate": 1.6196610163752154e-05, "loss": 0.049, "step": 3945 }, { "epoch": 0.93, "learning_rate": 1.6194620822755665e-05, "loss": 0.028, "step": 3946 }, { "epoch": 0.93, "learning_rate": 1.6192631083879887e-05, "loss": 0.0599, "step": 3947 }, { "epoch": 0.93, "learning_rate": 1.6190640947252618e-05, "loss": 0.0827, "step": 3948 }, { "epoch": 0.93, "learning_rate": 1.6188650413001684e-05, "loss": 0.0629, "step": 3949 }, { "epoch": 0.93, "learning_rate": 1.618665948125494e-05, "loss": 0.018, "step": 3950 }, { "epoch": 0.93, "learning_rate": 1.6184668152140253e-05, "loss": 0.0385, "step": 3951 }, { "epoch": 0.93, "learning_rate": 1.6182676425785534e-05, "loss": 0.0291, "step": 3952 }, { "epoch": 0.93, "learning_rate": 1.6180684302318712e-05, "loss": 0.0394, "step": 3953 }, { "epoch": 0.93, "learning_rate": 1.6178691781867738e-05, "loss": 0.0317, "step": 3954 }, { "epoch": 0.93, "learning_rate": 1.6176698864560595e-05, "loss": 0.1197, "step": 3955 }, { "epoch": 0.93, "learning_rate": 1.617470555052528e-05, "loss": 0.067, "step": 3956 }, { "epoch": 0.93, "learning_rate": 1.617271183988983e-05, "loss": 0.0242, "step": 3957 }, { "epoch": 0.93, "learning_rate": 1.6170717732782295e-05, "loss": 0.0799, "step": 3958 }, { "epoch": 0.93, "learning_rate": 1.616872322933076e-05, "loss": 0.088, "step": 3959 }, { "epoch": 0.93, "learning_rate": 1.616672832966333e-05, "loss": 0.0501, "step": 3960 }, { "epoch": 0.93, "learning_rate": 1.6164733033908143e-05, "loss": 0.0672, "step": 3961 }, { "epoch": 0.93, "learning_rate": 1.616273734219335e-05, "loss": 0.133, "step": 3962 }, { "epoch": 0.93, "learning_rate": 1.616074125464713e-05, "loss": 0.0981, "step": 3963 }, { "epoch": 0.93, "learning_rate": 1.61587447713977e-05, "loss": 0.0679, "step": 3964 }, { "epoch": 0.93, "learning_rate": 1.6156747892573293e-05, "loss": 0.0501, "step": 3965 }, { "epoch": 0.93, "learning_rate": 1.6154750618302162e-05, "loss": 0.0376, "step": 3966 }, { "epoch": 0.93, "learning_rate": 1.6152752948712596e-05, "loss": 0.043, "step": 3967 }, { "epoch": 0.93, "learning_rate": 1.6150754883932903e-05, "loss": 0.0263, "step": 3968 }, { "epoch": 0.93, "learning_rate": 1.614875642409142e-05, "loss": 0.0454, "step": 3969 }, { "epoch": 0.93, "learning_rate": 1.6146757569316504e-05, "loss": 0.0591, "step": 3970 }, { "epoch": 0.93, "learning_rate": 1.6144758319736547e-05, "loss": 0.0512, "step": 3971 }, { "epoch": 0.93, "learning_rate": 1.614275867547996e-05, "loss": 0.0143, "step": 3972 }, { "epoch": 0.93, "learning_rate": 1.6140758636675172e-05, "loss": 0.0491, "step": 3973 }, { "epoch": 0.93, "learning_rate": 1.613875820345065e-05, "loss": 0.0152, "step": 3974 }, { "epoch": 0.93, "learning_rate": 1.6136757375934882e-05, "loss": 0.0142, "step": 3975 }, { "epoch": 0.93, "learning_rate": 1.6134756154256385e-05, "loss": 0.0335, "step": 3976 }, { "epoch": 0.93, "learning_rate": 1.613275453854369e-05, "loss": 0.0291, "step": 3977 }, { "epoch": 0.93, "learning_rate": 1.6130752528925364e-05, "loss": 0.0566, "step": 3978 }, { "epoch": 0.93, "learning_rate": 1.6128750125529992e-05, "loss": 0.0065, "step": 3979 }, { "epoch": 0.93, "learning_rate": 1.6126747328486188e-05, "loss": 0.0648, "step": 3980 }, { "epoch": 0.93, "learning_rate": 1.61247441379226e-05, "loss": 0.0311, "step": 3981 }, { "epoch": 0.93, "learning_rate": 1.6122740553967882e-05, "loss": 0.0853, "step": 3982 }, { "epoch": 0.93, "learning_rate": 1.6120736576750732e-05, "loss": 0.0357, "step": 3983 }, { "epoch": 0.94, "learning_rate": 1.6118732206399855e-05, "loss": 0.0208, "step": 3984 }, { "epoch": 0.94, "learning_rate": 1.6116727443044003e-05, "loss": 0.0859, "step": 3985 }, { "epoch": 0.94, "learning_rate": 1.6114722286811936e-05, "loss": 0.1137, "step": 3986 }, { "epoch": 0.94, "learning_rate": 1.6112716737832443e-05, "loss": 0.002, "step": 3987 }, { "epoch": 0.94, "learning_rate": 1.611071079623434e-05, "loss": 0.0372, "step": 3988 }, { "epoch": 0.94, "learning_rate": 1.6108704462146473e-05, "loss": 0.0593, "step": 3989 }, { "epoch": 0.94, "learning_rate": 1.6106697735697708e-05, "loss": 0.0213, "step": 3990 }, { "epoch": 0.94, "learning_rate": 1.610469061701693e-05, "loss": 0.0653, "step": 3991 }, { "epoch": 0.94, "learning_rate": 1.6102683106233064e-05, "loss": 0.0463, "step": 3992 }, { "epoch": 0.94, "learning_rate": 1.6100675203475046e-05, "loss": 0.1299, "step": 3993 }, { "epoch": 0.94, "learning_rate": 1.6098666908871844e-05, "loss": 0.0275, "step": 3994 }, { "epoch": 0.94, "learning_rate": 1.609665822255245e-05, "loss": 0.0127, "step": 3995 }, { "epoch": 0.94, "learning_rate": 1.6094649144645886e-05, "loss": 0.0214, "step": 3996 }, { "epoch": 0.94, "learning_rate": 1.6092639675281194e-05, "loss": 0.1773, "step": 3997 }, { "epoch": 0.94, "learning_rate": 1.6090629814587436e-05, "loss": 0.085, "step": 3998 }, { "epoch": 0.94, "learning_rate": 1.608861956269371e-05, "loss": 0.0134, "step": 3999 }, { "epoch": 0.94, "learning_rate": 1.6086608919729135e-05, "loss": 0.0558, "step": 4000 }, { "epoch": 0.94, "learning_rate": 1.608459788582285e-05, "loss": 0.0095, "step": 4001 }, { "epoch": 0.94, "learning_rate": 1.6082586461104024e-05, "loss": 0.0091, "step": 4002 }, { "epoch": 0.94, "learning_rate": 1.608057464570185e-05, "loss": 0.0346, "step": 4003 }, { "epoch": 0.94, "learning_rate": 1.6078562439745552e-05, "loss": 0.1178, "step": 4004 }, { "epoch": 0.94, "learning_rate": 1.607654984336437e-05, "loss": 0.0836, "step": 4005 }, { "epoch": 0.94, "learning_rate": 1.6074536856687576e-05, "loss": 0.009, "step": 4006 }, { "epoch": 0.94, "learning_rate": 1.6072523479844455e-05, "loss": 0.015, "step": 4007 }, { "epoch": 0.94, "learning_rate": 1.607050971296433e-05, "loss": 0.1194, "step": 4008 }, { "epoch": 0.94, "learning_rate": 1.6068495556176548e-05, "loss": 0.0056, "step": 4009 }, { "epoch": 0.94, "learning_rate": 1.6066481009610474e-05, "loss": 0.033, "step": 4010 }, { "epoch": 0.94, "learning_rate": 1.606446607339551e-05, "loss": 0.0685, "step": 4011 }, { "epoch": 0.94, "learning_rate": 1.6062450747661062e-05, "loss": 0.0396, "step": 4012 }, { "epoch": 0.94, "learning_rate": 1.606043503253658e-05, "loss": 0.0597, "step": 4013 }, { "epoch": 0.94, "learning_rate": 1.605841892815154e-05, "loss": 0.0103, "step": 4014 }, { "epoch": 0.94, "learning_rate": 1.6056402434635424e-05, "loss": 0.0446, "step": 4015 }, { "epoch": 0.94, "learning_rate": 1.605438555211776e-05, "loss": 0.1054, "step": 4016 }, { "epoch": 0.94, "learning_rate": 1.605236828072809e-05, "loss": 0.0255, "step": 4017 }, { "epoch": 0.94, "learning_rate": 1.6050350620595976e-05, "loss": 0.0939, "step": 4018 }, { "epoch": 0.94, "learning_rate": 1.6048332571851022e-05, "loss": 0.035, "step": 4019 }, { "epoch": 0.94, "learning_rate": 1.604631413462284e-05, "loss": 0.0098, "step": 4020 }, { "epoch": 0.94, "learning_rate": 1.6044295309041077e-05, "loss": 0.0322, "step": 4021 }, { "epoch": 0.94, "learning_rate": 1.6042276095235402e-05, "loss": 0.0941, "step": 4022 }, { "epoch": 0.94, "learning_rate": 1.6040256493335508e-05, "loss": 0.0464, "step": 4023 }, { "epoch": 0.94, "learning_rate": 1.6038236503471112e-05, "loss": 0.0722, "step": 4024 }, { "epoch": 0.94, "learning_rate": 1.6036216125771956e-05, "loss": 0.06, "step": 4025 }, { "epoch": 0.94, "learning_rate": 1.6034195360367815e-05, "loss": 0.0558, "step": 4026 }, { "epoch": 0.95, "learning_rate": 1.603217420738848e-05, "loss": 0.0527, "step": 4027 }, { "epoch": 0.95, "learning_rate": 1.6030152666963768e-05, "loss": 0.0967, "step": 4028 }, { "epoch": 0.95, "learning_rate": 1.6028130739223517e-05, "loss": 0.0521, "step": 4029 }, { "epoch": 0.95, "learning_rate": 1.6026108424297605e-05, "loss": 0.025, "step": 4030 }, { "epoch": 0.95, "learning_rate": 1.6024085722315918e-05, "loss": 0.0131, "step": 4031 }, { "epoch": 0.95, "learning_rate": 1.6022062633408378e-05, "loss": 0.0614, "step": 4032 }, { "epoch": 0.95, "learning_rate": 1.6020039157704923e-05, "loss": 0.0598, "step": 4033 }, { "epoch": 0.95, "learning_rate": 1.6018015295335522e-05, "loss": 0.0481, "step": 4034 }, { "epoch": 0.95, "learning_rate": 1.601599104643017e-05, "loss": 0.0566, "step": 4035 }, { "epoch": 0.95, "learning_rate": 1.6013966411118882e-05, "loss": 0.0287, "step": 4036 }, { "epoch": 0.95, "learning_rate": 1.60119413895317e-05, "loss": 0.0303, "step": 4037 }, { "epoch": 0.95, "learning_rate": 1.6009915981798693e-05, "loss": 0.0687, "step": 4038 }, { "epoch": 0.95, "learning_rate": 1.600789018804995e-05, "loss": 0.0475, "step": 4039 }, { "epoch": 0.95, "learning_rate": 1.6005864008415584e-05, "loss": 0.0453, "step": 4040 }, { "epoch": 0.95, "learning_rate": 1.6003837443025742e-05, "loss": 0.0867, "step": 4041 }, { "epoch": 0.95, "learning_rate": 1.600181049201059e-05, "loss": 0.0838, "step": 4042 }, { "epoch": 0.95, "learning_rate": 1.5999783155500317e-05, "loss": 0.0129, "step": 4043 }, { "epoch": 0.95, "learning_rate": 1.5997755433625136e-05, "loss": 0.0795, "step": 4044 }, { "epoch": 0.95, "learning_rate": 1.5995727326515288e-05, "loss": 0.0377, "step": 4045 }, { "epoch": 0.95, "learning_rate": 1.599369883430104e-05, "loss": 0.052, "step": 4046 }, { "epoch": 0.95, "learning_rate": 1.599166995711268e-05, "loss": 0.092, "step": 4047 }, { "epoch": 0.95, "learning_rate": 1.5989640695080525e-05, "loss": 0.0358, "step": 4048 }, { "epoch": 0.95, "learning_rate": 1.598761104833491e-05, "loss": 0.0308, "step": 4049 }, { "epoch": 0.95, "learning_rate": 1.5985581017006202e-05, "loss": 0.0691, "step": 4050 }, { "epoch": 0.95, "learning_rate": 1.598355060122479e-05, "loss": 0.0234, "step": 4051 }, { "epoch": 0.95, "learning_rate": 1.5981519801121086e-05, "loss": 0.0466, "step": 4052 }, { "epoch": 0.95, "learning_rate": 1.5979488616825522e-05, "loss": 0.033, "step": 4053 }, { "epoch": 0.95, "learning_rate": 1.5977457048468574e-05, "loss": 0.047, "step": 4054 }, { "epoch": 0.95, "learning_rate": 1.5975425096180718e-05, "loss": 0.0089, "step": 4055 }, { "epoch": 0.95, "learning_rate": 1.597339276009247e-05, "loss": 0.0195, "step": 4056 }, { "epoch": 0.95, "learning_rate": 1.5971360040334362e-05, "loss": 0.0554, "step": 4057 }, { "epoch": 0.95, "learning_rate": 1.596932693703696e-05, "loss": 0.1128, "step": 4058 }, { "epoch": 0.95, "learning_rate": 1.5967293450330853e-05, "loss": 0.0754, "step": 4059 }, { "epoch": 0.95, "learning_rate": 1.596525958034665e-05, "loss": 0.0563, "step": 4060 }, { "epoch": 0.95, "learning_rate": 1.5963225327214975e-05, "loss": 0.0309, "step": 4061 }, { "epoch": 0.95, "learning_rate": 1.5961190691066502e-05, "loss": 0.0075, "step": 4062 }, { "epoch": 0.95, "learning_rate": 1.5959155672031907e-05, "loss": 0.0172, "step": 4063 }, { "epoch": 0.95, "learning_rate": 1.5957120270241903e-05, "loss": 0.058, "step": 4064 }, { "epoch": 0.95, "learning_rate": 1.5955084485827218e-05, "loss": 0.0258, "step": 4065 }, { "epoch": 0.95, "learning_rate": 1.5953048318918617e-05, "loss": 0.0966, "step": 4066 }, { "epoch": 0.95, "learning_rate": 1.595101176964688e-05, "loss": 0.0191, "step": 4067 }, { "epoch": 0.95, "learning_rate": 1.5948974838142813e-05, "loss": 0.0712, "step": 4068 }, { "epoch": 0.95, "learning_rate": 1.594693752453725e-05, "loss": 0.0423, "step": 4069 }, { "epoch": 0.96, "learning_rate": 1.5944899828961043e-05, "loss": 0.0783, "step": 4070 }, { "epoch": 0.96, "learning_rate": 1.5942861751545076e-05, "loss": 0.02, "step": 4071 }, { "epoch": 0.96, "learning_rate": 1.594082329242025e-05, "loss": 0.0363, "step": 4072 }, { "epoch": 0.96, "learning_rate": 1.5938784451717504e-05, "loss": 0.0626, "step": 4073 }, { "epoch": 0.96, "learning_rate": 1.5936745229567785e-05, "loss": 0.0503, "step": 4074 }, { "epoch": 0.96, "learning_rate": 1.5934705626102074e-05, "loss": 0.0224, "step": 4075 }, { "epoch": 0.96, "learning_rate": 1.5932665641451375e-05, "loss": 0.0687, "step": 4076 }, { "epoch": 0.96, "learning_rate": 1.5930625275746715e-05, "loss": 0.0552, "step": 4077 }, { "epoch": 0.96, "learning_rate": 1.5928584529119146e-05, "loss": 0.0358, "step": 4078 }, { "epoch": 0.96, "learning_rate": 1.5926543401699742e-05, "loss": 0.092, "step": 4079 }, { "epoch": 0.96, "learning_rate": 1.5924501893619606e-05, "loss": 0.0438, "step": 4080 }, { "epoch": 0.96, "learning_rate": 1.5922460005009867e-05, "loss": 0.0207, "step": 4081 }, { "epoch": 0.96, "learning_rate": 1.5920417736001674e-05, "loss": 0.0367, "step": 4082 }, { "epoch": 0.96, "learning_rate": 1.59183750867262e-05, "loss": 0.172, "step": 4083 }, { "epoch": 0.96, "learning_rate": 1.5916332057314643e-05, "loss": 0.0591, "step": 4084 }, { "epoch": 0.96, "learning_rate": 1.591428864789823e-05, "loss": 0.0562, "step": 4085 }, { "epoch": 0.96, "learning_rate": 1.5912244858608205e-05, "loss": 0.0732, "step": 4086 }, { "epoch": 0.96, "learning_rate": 1.5910200689575843e-05, "loss": 0.0669, "step": 4087 }, { "epoch": 0.96, "learning_rate": 1.590815614093244e-05, "loss": 0.0377, "step": 4088 }, { "epoch": 0.96, "learning_rate": 1.5906111212809313e-05, "loss": 0.0258, "step": 4089 }, { "epoch": 0.96, "learning_rate": 1.5904065905337813e-05, "loss": 0.043, "step": 4090 }, { "epoch": 0.96, "learning_rate": 1.5902020218649306e-05, "loss": 0.0448, "step": 4091 }, { "epoch": 0.96, "learning_rate": 1.589997415287519e-05, "loss": 0.0613, "step": 4092 }, { "epoch": 0.96, "learning_rate": 1.589792770814688e-05, "loss": 0.0296, "step": 4093 }, { "epoch": 0.96, "learning_rate": 1.5895880884595816e-05, "loss": 0.077, "step": 4094 }, { "epoch": 0.96, "learning_rate": 1.5893833682353473e-05, "loss": 0.0371, "step": 4095 }, { "epoch": 0.96, "learning_rate": 1.5891786101551337e-05, "loss": 0.0783, "step": 4096 }, { "epoch": 0.96, "learning_rate": 1.588973814232092e-05, "loss": 0.0238, "step": 4097 }, { "epoch": 0.96, "learning_rate": 1.5887689804793773e-05, "loss": 0.0296, "step": 4098 }, { "epoch": 0.96, "learning_rate": 1.588564108910145e-05, "loss": 0.0559, "step": 4099 }, { "epoch": 0.96, "learning_rate": 1.5883591995375545e-05, "loss": 0.0404, "step": 4100 }, { "epoch": 0.96, "learning_rate": 1.5881542523747673e-05, "loss": 0.0379, "step": 4101 }, { "epoch": 0.96, "learning_rate": 1.5879492674349463e-05, "loss": 0.0112, "step": 4102 }, { "epoch": 0.96, "learning_rate": 1.5877442447312583e-05, "loss": 0.0277, "step": 4103 }, { "epoch": 0.96, "learning_rate": 1.587539184276872e-05, "loss": 0.0777, "step": 4104 }, { "epoch": 0.96, "learning_rate": 1.5873340860849578e-05, "loss": 0.0498, "step": 4105 }, { "epoch": 0.96, "learning_rate": 1.587128950168689e-05, "loss": 0.0052, "step": 4106 }, { "epoch": 0.96, "learning_rate": 1.586923776541242e-05, "loss": 0.0566, "step": 4107 }, { "epoch": 0.96, "learning_rate": 1.5867185652157956e-05, "loss": 0.035, "step": 4108 }, { "epoch": 0.96, "learning_rate": 1.586513316205529e-05, "loss": 0.0941, "step": 4109 }, { "epoch": 0.96, "learning_rate": 1.5863080295236267e-05, "loss": 0.0237, "step": 4110 }, { "epoch": 0.96, "learning_rate": 1.586102705183273e-05, "loss": 0.0135, "step": 4111 }, { "epoch": 0.97, "learning_rate": 1.585897343197657e-05, "loss": 0.0195, "step": 4112 }, { "epoch": 0.97, "learning_rate": 1.5856919435799684e-05, "loss": 0.0479, "step": 4113 }, { "epoch": 0.97, "learning_rate": 1.5854865063433996e-05, "loss": 0.0564, "step": 4114 }, { "epoch": 0.97, "learning_rate": 1.5852810315011464e-05, "loss": 0.0293, "step": 4115 }, { "epoch": 0.97, "learning_rate": 1.5850755190664068e-05, "loss": 0.0111, "step": 4116 }, { "epoch": 0.97, "learning_rate": 1.58486996905238e-05, "loss": 0.164, "step": 4117 }, { "epoch": 0.97, "learning_rate": 1.5846643814722685e-05, "loss": 0.0362, "step": 4118 }, { "epoch": 0.97, "learning_rate": 1.5844587563392782e-05, "loss": 0.0607, "step": 4119 }, { "epoch": 0.97, "learning_rate": 1.5842530936666148e-05, "loss": 0.1126, "step": 4120 }, { "epoch": 0.97, "learning_rate": 1.5840473934674892e-05, "loss": 0.0063, "step": 4121 }, { "epoch": 0.97, "learning_rate": 1.5838416557551128e-05, "loss": 0.0904, "step": 4122 }, { "epoch": 0.97, "learning_rate": 1.5836358805427002e-05, "loss": 0.0362, "step": 4123 }, { "epoch": 0.97, "learning_rate": 1.5834300678434687e-05, "loss": 0.0212, "step": 4124 }, { "epoch": 0.97, "learning_rate": 1.5832242176706372e-05, "loss": 0.0461, "step": 4125 }, { "epoch": 0.97, "learning_rate": 1.5830183300374275e-05, "loss": 0.0856, "step": 4126 }, { "epoch": 0.97, "learning_rate": 1.5828124049570636e-05, "loss": 0.009, "step": 4127 }, { "epoch": 0.97, "learning_rate": 1.5826064424427724e-05, "loss": 0.0849, "step": 4128 }, { "epoch": 0.97, "learning_rate": 1.5824004425077827e-05, "loss": 0.0391, "step": 4129 }, { "epoch": 0.97, "learning_rate": 1.582194405165326e-05, "loss": 0.0221, "step": 4130 }, { "epoch": 0.97, "learning_rate": 1.5819883304286355e-05, "loss": 0.0428, "step": 4131 }, { "epoch": 0.97, "learning_rate": 1.5817822183109475e-05, "loss": 0.0636, "step": 4132 }, { "epoch": 0.97, "learning_rate": 1.5815760688255008e-05, "loss": 0.0434, "step": 4133 }, { "epoch": 0.97, "learning_rate": 1.5813698819855362e-05, "loss": 0.0415, "step": 4134 }, { "epoch": 0.97, "learning_rate": 1.5811636578042972e-05, "loss": 0.0181, "step": 4135 }, { "epoch": 0.97, "learning_rate": 1.5809573962950295e-05, "loss": 0.0751, "step": 4136 }, { "epoch": 0.97, "learning_rate": 1.580751097470981e-05, "loss": 0.0384, "step": 4137 }, { "epoch": 0.97, "learning_rate": 1.5805447613454025e-05, "loss": 0.0598, "step": 4138 }, { "epoch": 0.97, "learning_rate": 1.5803383879315465e-05, "loss": 0.0749, "step": 4139 }, { "epoch": 0.97, "learning_rate": 1.5801319772426688e-05, "loss": 0.0083, "step": 4140 }, { "epoch": 0.97, "learning_rate": 1.579925529292027e-05, "loss": 0.0922, "step": 4141 }, { "epoch": 0.97, "learning_rate": 1.579719044092881e-05, "loss": 0.0114, "step": 4142 }, { "epoch": 0.97, "learning_rate": 1.5795125216584938e-05, "loss": 0.061, "step": 4143 }, { "epoch": 0.97, "learning_rate": 1.5793059620021295e-05, "loss": 0.025, "step": 4144 }, { "epoch": 0.97, "learning_rate": 1.5790993651370562e-05, "loss": 0.0472, "step": 4145 }, { "epoch": 0.97, "learning_rate": 1.5788927310765432e-05, "loss": 0.0541, "step": 4146 }, { "epoch": 0.97, "learning_rate": 1.5786860598338623e-05, "loss": 0.0347, "step": 4147 }, { "epoch": 0.97, "learning_rate": 1.5784793514222883e-05, "loss": 0.0427, "step": 4148 }, { "epoch": 0.97, "learning_rate": 1.578272605855098e-05, "loss": 0.0382, "step": 4149 }, { "epoch": 0.97, "learning_rate": 1.578065823145571e-05, "loss": 0.0535, "step": 4150 }, { "epoch": 0.97, "learning_rate": 1.577859003306988e-05, "loss": 0.0468, "step": 4151 }, { "epoch": 0.97, "learning_rate": 1.577652146352634e-05, "loss": 0.0332, "step": 4152 }, { "epoch": 0.97, "learning_rate": 1.5774452522957947e-05, "loss": 0.0265, "step": 4153 }, { "epoch": 0.97, "learning_rate": 1.577238321149759e-05, "loss": 0.0479, "step": 4154 }, { "epoch": 0.98, "learning_rate": 1.577031352927818e-05, "loss": 0.0297, "step": 4155 }, { "epoch": 0.98, "learning_rate": 1.5768243476432655e-05, "loss": 0.0202, "step": 4156 }, { "epoch": 0.98, "learning_rate": 1.576617305309397e-05, "loss": 0.0044, "step": 4157 }, { "epoch": 0.98, "learning_rate": 1.5764102259395118e-05, "loss": 0.0133, "step": 4158 }, { "epoch": 0.98, "learning_rate": 1.576203109546909e-05, "loss": 0.0434, "step": 4159 }, { "epoch": 0.98, "learning_rate": 1.575995956144893e-05, "loss": 0.0077, "step": 4160 }, { "epoch": 0.98, "learning_rate": 1.5757887657467687e-05, "loss": 0.0413, "step": 4161 }, { "epoch": 0.98, "learning_rate": 1.5755815383658434e-05, "loss": 0.0862, "step": 4162 }, { "epoch": 0.98, "learning_rate": 1.575374274015428e-05, "loss": 0.0214, "step": 4163 }, { "epoch": 0.98, "learning_rate": 1.5751669727088348e-05, "loss": 0.0177, "step": 4164 }, { "epoch": 0.98, "learning_rate": 1.5749596344593787e-05, "loss": 0.1193, "step": 4165 }, { "epoch": 0.98, "learning_rate": 1.574752259280377e-05, "loss": 0.0059, "step": 4166 }, { "epoch": 0.98, "learning_rate": 1.5745448471851492e-05, "loss": 0.0357, "step": 4167 }, { "epoch": 0.98, "learning_rate": 1.574337398187018e-05, "loss": 0.0356, "step": 4168 }, { "epoch": 0.98, "learning_rate": 1.574129912299307e-05, "loss": 0.1277, "step": 4169 }, { "epoch": 0.98, "learning_rate": 1.5739223895353435e-05, "loss": 0.0601, "step": 4170 }, { "epoch": 0.98, "learning_rate": 1.5737148299084562e-05, "loss": 0.0217, "step": 4171 }, { "epoch": 0.98, "learning_rate": 1.5735072334319772e-05, "loss": 0.0259, "step": 4172 }, { "epoch": 0.98, "learning_rate": 1.5732996001192398e-05, "loss": 0.0922, "step": 4173 }, { "epoch": 0.98, "learning_rate": 1.5730919299835807e-05, "loss": 0.0602, "step": 4174 }, { "epoch": 0.98, "learning_rate": 1.572884223038338e-05, "loss": 0.0466, "step": 4175 }, { "epoch": 0.98, "learning_rate": 1.5726764792968533e-05, "loss": 0.0398, "step": 4176 }, { "epoch": 0.98, "learning_rate": 1.5724686987724697e-05, "loss": 0.0381, "step": 4177 }, { "epoch": 0.98, "learning_rate": 1.5722608814785325e-05, "loss": 0.111, "step": 4178 }, { "epoch": 0.98, "learning_rate": 1.5720530274283903e-05, "loss": 0.0413, "step": 4179 }, { "epoch": 0.98, "learning_rate": 1.571845136635393e-05, "loss": 0.0348, "step": 4180 }, { "epoch": 0.98, "learning_rate": 1.5716372091128942e-05, "loss": 0.0772, "step": 4181 }, { "epoch": 0.98, "learning_rate": 1.5714292448742485e-05, "loss": 0.0048, "step": 4182 }, { "epoch": 0.98, "learning_rate": 1.571221243932813e-05, "loss": 0.0635, "step": 4183 }, { "epoch": 0.98, "learning_rate": 1.5710132063019483e-05, "loss": 0.0141, "step": 4184 }, { "epoch": 0.98, "learning_rate": 1.5708051319950164e-05, "loss": 0.093, "step": 4185 }, { "epoch": 0.98, "learning_rate": 1.5705970210253818e-05, "loss": 0.0378, "step": 4186 }, { "epoch": 0.98, "learning_rate": 1.5703888734064113e-05, "loss": 0.0461, "step": 4187 }, { "epoch": 0.98, "learning_rate": 1.5701806891514746e-05, "loss": 0.0706, "step": 4188 }, { "epoch": 0.98, "learning_rate": 1.569972468273943e-05, "loss": 0.0873, "step": 4189 }, { "epoch": 0.98, "learning_rate": 1.56976421078719e-05, "loss": 0.0827, "step": 4190 }, { "epoch": 0.98, "learning_rate": 1.569555916704593e-05, "loss": 0.0171, "step": 4191 }, { "epoch": 0.98, "learning_rate": 1.5693475860395298e-05, "loss": 0.0827, "step": 4192 }, { "epoch": 0.98, "learning_rate": 1.569139218805382e-05, "loss": 0.0515, "step": 4193 }, { "epoch": 0.98, "learning_rate": 1.568930815015533e-05, "loss": 0.0647, "step": 4194 }, { "epoch": 0.98, "learning_rate": 1.568722374683368e-05, "loss": 0.0776, "step": 4195 }, { "epoch": 0.98, "learning_rate": 1.568513897822276e-05, "loss": 0.0461, "step": 4196 }, { "epoch": 0.99, "learning_rate": 1.5683053844456463e-05, "loss": 0.0962, "step": 4197 }, { "epoch": 0.99, "learning_rate": 1.5680968345668725e-05, "loss": 0.0825, "step": 4198 }, { "epoch": 0.99, "learning_rate": 1.5678882481993488e-05, "loss": 0.1609, "step": 4199 }, { "epoch": 0.99, "learning_rate": 1.5676796253564738e-05, "loss": 0.0338, "step": 4200 }, { "epoch": 0.99, "learning_rate": 1.5674709660516464e-05, "loss": 0.0173, "step": 4201 }, { "epoch": 0.99, "learning_rate": 1.5672622702982696e-05, "loss": 0.0189, "step": 4202 }, { "epoch": 0.99, "learning_rate": 1.567053538109747e-05, "loss": 0.0407, "step": 4203 }, { "epoch": 0.99, "learning_rate": 1.5668447694994862e-05, "loss": 0.0302, "step": 4204 }, { "epoch": 0.99, "learning_rate": 1.5666359644808958e-05, "loss": 0.117, "step": 4205 }, { "epoch": 0.99, "learning_rate": 1.566427123067388e-05, "loss": 0.0567, "step": 4206 }, { "epoch": 0.99, "learning_rate": 1.5662182452723757e-05, "loss": 0.0247, "step": 4207 }, { "epoch": 0.99, "learning_rate": 1.5660093311092755e-05, "loss": 0.0138, "step": 4208 }, { "epoch": 0.99, "learning_rate": 1.5658003805915064e-05, "loss": 0.0351, "step": 4209 }, { "epoch": 0.99, "learning_rate": 1.5655913937324883e-05, "loss": 0.0533, "step": 4210 }, { "epoch": 0.99, "learning_rate": 1.565382370545645e-05, "loss": 0.0829, "step": 4211 }, { "epoch": 0.99, "learning_rate": 1.565173311044402e-05, "loss": 0.0409, "step": 4212 }, { "epoch": 0.99, "learning_rate": 1.5649642152421873e-05, "loss": 0.0122, "step": 4213 }, { "epoch": 0.99, "learning_rate": 1.5647550831524308e-05, "loss": 0.0779, "step": 4214 }, { "epoch": 0.99, "learning_rate": 1.564545914788565e-05, "loss": 0.0224, "step": 4215 }, { "epoch": 0.99, "learning_rate": 1.5643367101640247e-05, "loss": 0.0303, "step": 4216 }, { "epoch": 0.99, "learning_rate": 1.564127469292247e-05, "loss": 0.0169, "step": 4217 }, { "epoch": 0.99, "learning_rate": 1.5639181921866718e-05, "loss": 0.0178, "step": 4218 }, { "epoch": 0.99, "learning_rate": 1.563708878860741e-05, "loss": 0.0587, "step": 4219 }, { "epoch": 0.99, "learning_rate": 1.563499529327898e-05, "loss": 0.0592, "step": 4220 }, { "epoch": 0.99, "learning_rate": 1.5632901436015902e-05, "loss": 0.0324, "step": 4221 }, { "epoch": 0.99, "learning_rate": 1.5630807216952655e-05, "loss": 0.0424, "step": 4222 }, { "epoch": 0.99, "learning_rate": 1.5628712636223754e-05, "loss": 0.087, "step": 4223 }, { "epoch": 0.99, "learning_rate": 1.562661769396374e-05, "loss": 0.045, "step": 4224 }, { "epoch": 0.99, "learning_rate": 1.562452239030716e-05, "loss": 0.0159, "step": 4225 }, { "epoch": 0.99, "learning_rate": 1.5622426725388603e-05, "loss": 0.0224, "step": 4226 }, { "epoch": 0.99, "learning_rate": 1.562033069934267e-05, "loss": 0.0849, "step": 4227 }, { "epoch": 0.99, "learning_rate": 1.5618234312303987e-05, "loss": 0.0778, "step": 4228 }, { "epoch": 0.99, "learning_rate": 1.5616137564407207e-05, "loss": 0.0324, "step": 4229 }, { "epoch": 0.99, "learning_rate": 1.5614040455787002e-05, "loss": 0.0661, "step": 4230 }, { "epoch": 0.99, "learning_rate": 1.561194298657807e-05, "loss": 0.0482, "step": 4231 }, { "epoch": 0.99, "learning_rate": 1.560984515691513e-05, "loss": 0.0148, "step": 4232 }, { "epoch": 0.99, "learning_rate": 1.560774696693293e-05, "loss": 0.0849, "step": 4233 }, { "epoch": 0.99, "learning_rate": 1.5605648416766227e-05, "loss": 0.0086, "step": 4234 }, { "epoch": 0.99, "learning_rate": 1.5603549506549814e-05, "loss": 0.0451, "step": 4235 }, { "epoch": 0.99, "learning_rate": 1.5601450236418506e-05, "loss": 0.0097, "step": 4236 }, { "epoch": 0.99, "learning_rate": 1.5599350606507142e-05, "loss": 0.0263, "step": 4237 }, { "epoch": 0.99, "learning_rate": 1.5597250616950576e-05, "loss": 0.032, "step": 4238 }, { "epoch": 0.99, "learning_rate": 1.5595150267883693e-05, "loss": 0.0083, "step": 4239 }, { "epoch": 1.0, "learning_rate": 1.559304955944139e-05, "loss": 0.042, "step": 4240 }, { "epoch": 1.0, "learning_rate": 1.5590948491758603e-05, "loss": 0.0267, "step": 4241 }, { "epoch": 1.0, "learning_rate": 1.558884706497028e-05, "loss": 0.0133, "step": 4242 }, { "epoch": 1.0, "learning_rate": 1.5586745279211398e-05, "loss": 0.0304, "step": 4243 }, { "epoch": 1.0, "learning_rate": 1.558464313461695e-05, "loss": 0.0248, "step": 4244 }, { "epoch": 1.0, "learning_rate": 1.558254063132196e-05, "loss": 0.0833, "step": 4245 }, { "epoch": 1.0, "learning_rate": 1.5580437769461472e-05, "loss": 0.0801, "step": 4246 }, { "epoch": 1.0, "learning_rate": 1.557833454917055e-05, "loss": 0.0573, "step": 4247 }, { "epoch": 1.0, "learning_rate": 1.5576230970584284e-05, "loss": 0.0108, "step": 4248 }, { "epoch": 1.0, "learning_rate": 1.5574127033837788e-05, "loss": 0.0815, "step": 4249 }, { "epoch": 1.0, "learning_rate": 1.5572022739066194e-05, "loss": 0.0434, "step": 4250 }, { "epoch": 1.0, "learning_rate": 1.5569918086404663e-05, "loss": 0.0208, "step": 4251 }, { "epoch": 1.0, "learning_rate": 1.5567813075988372e-05, "loss": 0.0739, "step": 4252 }, { "epoch": 1.0, "learning_rate": 1.5565707707952534e-05, "loss": 0.0243, "step": 4253 }, { "epoch": 1.0, "learning_rate": 1.5563601982432367e-05, "loss": 0.0274, "step": 4254 }, { "epoch": 1.0, "learning_rate": 1.5561495899563132e-05, "loss": 0.0625, "step": 4255 }, { "epoch": 1.0, "learning_rate": 1.5559389459480092e-05, "loss": 0.0392, "step": 4256 }, { "epoch": 1.0, "learning_rate": 1.5557282662318546e-05, "loss": 0.0805, "step": 4257 }, { "epoch": 1.0, "learning_rate": 1.555517550821382e-05, "loss": 0.0319, "step": 4258 }, { "epoch": 1.0, "learning_rate": 1.555306799730125e-05, "loss": 0.0519, "step": 4259 }, { "epoch": 1.0, "learning_rate": 1.5550960129716196e-05, "loss": 0.0406, "step": 4260 }, { "epoch": 1.0, "learning_rate": 1.5548851905594053e-05, "loss": 0.0826, "step": 4261 }, { "epoch": 1.0, "learning_rate": 1.5546743325070234e-05, "loss": 0.0618, "step": 4262 }, { "epoch": 1.0, "learning_rate": 1.5544634388280167e-05, "loss": 0.048, "step": 4263 }, { "epoch": 1.0, "learning_rate": 1.5542525095359312e-05, "loss": 0.1698, "step": 4264 }, { "epoch": 1.0, "learning_rate": 1.554041544644315e-05, "loss": 0.0609, "step": 4265 }, { "epoch": 1.0, "learning_rate": 1.5538305441667177e-05, "loss": 0.0418, "step": 4266 }, { "epoch": 1.0, "learning_rate": 1.5536195081166924e-05, "loss": 0.0698, "step": 4267 }, { "epoch": 1.0, "learning_rate": 1.5534084365077936e-05, "loss": 0.0542, "step": 4268 }, { "epoch": 1.0, "learning_rate": 1.5531973293535784e-05, "loss": 0.0825, "step": 4269 }, { "epoch": 1.0, "learning_rate": 1.552986186667606e-05, "loss": 0.0368, "step": 4270 }, { "epoch": 1.0, "learning_rate": 1.5527750084634384e-05, "loss": 0.0873, "step": 4271 }, { "epoch": 1.0, "learning_rate": 1.5525637947546397e-05, "loss": 0.0115, "step": 4272 }, { "epoch": 1.0, "learning_rate": 1.552352545554776e-05, "loss": 0.1199, "step": 4273 }, { "epoch": 1.0, "learning_rate": 1.5521412608774153e-05, "loss": 0.0532, "step": 4274 }, { "epoch": 1.0, "learning_rate": 1.551929940736129e-05, "loss": 0.0816, "step": 4275 }, { "epoch": 1.0, "learning_rate": 1.5517185851444895e-05, "loss": 0.1266, "step": 4276 }, { "epoch": 1.0, "learning_rate": 1.5515071941160728e-05, "loss": 0.0278, "step": 4277 }, { "epoch": 1.0, "learning_rate": 1.551295767664456e-05, "loss": 0.0659, "step": 4278 }, { "epoch": 1.0, "learning_rate": 1.5510843058032194e-05, "loss": 0.0153, "step": 4279 }, { "epoch": 1.0, "learning_rate": 1.5508728085459447e-05, "loss": 0.0274, "step": 4280 }, { "epoch": 1.0, "learning_rate": 1.5506612759062166e-05, "loss": 0.1419, "step": 4281 }, { "epoch": 1.0, "learning_rate": 1.550449707897622e-05, "loss": 0.0464, "step": 4282 }, { "epoch": 1.01, "learning_rate": 1.5502381045337497e-05, "loss": 0.0531, "step": 4283 }, { "epoch": 1.01, "learning_rate": 1.5500264658281907e-05, "loss": 0.0572, "step": 4284 }, { "epoch": 1.01, "learning_rate": 1.5498147917945385e-05, "loss": 0.0338, "step": 4285 }, { "epoch": 1.01, "learning_rate": 1.5496030824463894e-05, "loss": 0.0388, "step": 4286 }, { "epoch": 1.01, "learning_rate": 1.549391337797341e-05, "loss": 0.0418, "step": 4287 }, { "epoch": 1.01, "learning_rate": 1.5491795578609937e-05, "loss": 0.124, "step": 4288 }, { "epoch": 1.01, "learning_rate": 1.54896774265095e-05, "loss": 0.0451, "step": 4289 }, { "epoch": 1.01, "learning_rate": 1.548755892180815e-05, "loss": 0.0521, "step": 4290 }, { "epoch": 1.01, "learning_rate": 1.5485440064641958e-05, "loss": 0.1111, "step": 4291 }, { "epoch": 1.01, "learning_rate": 1.5483320855147014e-05, "loss": 0.0561, "step": 4292 }, { "epoch": 1.01, "learning_rate": 1.5481201293459443e-05, "loss": 0.0928, "step": 4293 }, { "epoch": 1.01, "learning_rate": 1.5479081379715374e-05, "loss": 0.0534, "step": 4294 }, { "epoch": 1.01, "learning_rate": 1.547696111405097e-05, "loss": 0.0419, "step": 4295 }, { "epoch": 1.01, "learning_rate": 1.5474840496602423e-05, "loss": 0.1179, "step": 4296 }, { "epoch": 1.01, "learning_rate": 1.5472719527505933e-05, "loss": 0.0186, "step": 4297 }, { "epoch": 1.01, "learning_rate": 1.547059820689773e-05, "loss": 0.0195, "step": 4298 }, { "epoch": 1.01, "learning_rate": 1.546847653491407e-05, "loss": 0.0423, "step": 4299 }, { "epoch": 1.01, "learning_rate": 1.5466354511691223e-05, "loss": 0.0179, "step": 4300 }, { "epoch": 1.01, "learning_rate": 1.5464232137365492e-05, "loss": 0.039, "step": 4301 }, { "epoch": 1.01, "learning_rate": 1.5462109412073187e-05, "loss": 0.0485, "step": 4302 }, { "epoch": 1.01, "learning_rate": 1.5459986335950658e-05, "loss": 0.0398, "step": 4303 }, { "epoch": 1.01, "learning_rate": 1.5457862909134268e-05, "loss": 0.0216, "step": 4304 }, { "epoch": 1.01, "learning_rate": 1.545573913176041e-05, "loss": 0.0373, "step": 4305 }, { "epoch": 1.01, "learning_rate": 1.5453615003965478e-05, "loss": 0.0629, "step": 4306 }, { "epoch": 1.01, "learning_rate": 1.545149052588592e-05, "loss": 0.0105, "step": 4307 }, { "epoch": 1.01, "learning_rate": 1.5449365697658185e-05, "loss": 0.0472, "step": 4308 }, { "epoch": 1.01, "learning_rate": 1.544724051941875e-05, "loss": 0.042, "step": 4309 }, { "epoch": 1.01, "learning_rate": 1.5445114991304112e-05, "loss": 0.0125, "step": 4310 }, { "epoch": 1.01, "learning_rate": 1.5442989113450798e-05, "loss": 0.0361, "step": 4311 }, { "epoch": 1.01, "learning_rate": 1.544086288599535e-05, "loss": 0.0928, "step": 4312 }, { "epoch": 1.01, "learning_rate": 1.543873630907434e-05, "loss": 0.0083, "step": 4313 }, { "epoch": 1.01, "learning_rate": 1.543660938282435e-05, "loss": 0.1017, "step": 4314 }, { "epoch": 1.01, "learning_rate": 1.5434482107381996e-05, "loss": 0.028, "step": 4315 }, { "epoch": 1.01, "learning_rate": 1.5432354482883914e-05, "loss": 0.0104, "step": 4316 }, { "epoch": 1.01, "learning_rate": 1.5430226509466758e-05, "loss": 0.0298, "step": 4317 }, { "epoch": 1.01, "learning_rate": 1.5428098187267214e-05, "loss": 0.0403, "step": 4318 }, { "epoch": 1.01, "learning_rate": 1.5425969516421974e-05, "loss": 0.0309, "step": 4319 }, { "epoch": 1.01, "learning_rate": 1.5423840497067762e-05, "loss": 0.0306, "step": 4320 }, { "epoch": 1.01, "learning_rate": 1.5421711129341337e-05, "loss": 0.0337, "step": 4321 }, { "epoch": 1.01, "learning_rate": 1.5419581413379456e-05, "loss": 0.0264, "step": 4322 }, { "epoch": 1.01, "learning_rate": 1.5417451349318914e-05, "loss": 0.0679, "step": 4323 }, { "epoch": 1.01, "learning_rate": 1.5415320937296528e-05, "loss": 0.0232, "step": 4324 }, { "epoch": 1.02, "learning_rate": 1.5413190177449126e-05, "loss": 0.0118, "step": 4325 }, { "epoch": 1.02, "learning_rate": 1.541105906991357e-05, "loss": 0.0821, "step": 4326 }, { "epoch": 1.02, "learning_rate": 1.5408927614826747e-05, "loss": 0.1871, "step": 4327 }, { "epoch": 1.02, "learning_rate": 1.540679581232555e-05, "loss": 0.0368, "step": 4328 }, { "epoch": 1.02, "learning_rate": 1.5404663662546912e-05, "loss": 0.1353, "step": 4329 }, { "epoch": 1.02, "learning_rate": 1.5402531165627777e-05, "loss": 0.0442, "step": 4330 }, { "epoch": 1.02, "learning_rate": 1.5400398321705116e-05, "loss": 0.1138, "step": 4331 }, { "epoch": 1.02, "learning_rate": 1.5398265130915917e-05, "loss": 0.0447, "step": 4332 }, { "epoch": 1.02, "learning_rate": 1.53961315933972e-05, "loss": 0.0188, "step": 4333 }, { "epoch": 1.02, "learning_rate": 1.5393997709285997e-05, "loss": 0.0195, "step": 4334 }, { "epoch": 1.02, "learning_rate": 1.5391863478719372e-05, "loss": 0.03, "step": 4335 }, { "epoch": 1.02, "learning_rate": 1.5389728901834403e-05, "loss": 0.0685, "step": 4336 }, { "epoch": 1.02, "learning_rate": 1.5387593978768193e-05, "loss": 0.0198, "step": 4337 }, { "epoch": 1.02, "learning_rate": 1.538545870965787e-05, "loss": 0.0392, "step": 4338 }, { "epoch": 1.02, "learning_rate": 1.5383323094640582e-05, "loss": 0.0855, "step": 4339 }, { "epoch": 1.02, "learning_rate": 1.5381187133853495e-05, "loss": 0.0444, "step": 4340 }, { "epoch": 1.02, "learning_rate": 1.5379050827433807e-05, "loss": 0.0497, "step": 4341 }, { "epoch": 1.02, "learning_rate": 1.537691417551873e-05, "loss": 0.0377, "step": 4342 }, { "epoch": 1.02, "learning_rate": 1.5374777178245498e-05, "loss": 0.059, "step": 4343 }, { "epoch": 1.02, "learning_rate": 1.5372639835751372e-05, "loss": 0.033, "step": 4344 }, { "epoch": 1.02, "learning_rate": 1.5370502148173635e-05, "loss": 0.0133, "step": 4345 }, { "epoch": 1.02, "learning_rate": 1.5368364115649588e-05, "loss": 0.0913, "step": 4346 }, { "epoch": 1.02, "learning_rate": 1.5366225738316557e-05, "loss": 0.027, "step": 4347 }, { "epoch": 1.02, "learning_rate": 1.536408701631189e-05, "loss": 0.0586, "step": 4348 }, { "epoch": 1.02, "learning_rate": 1.5361947949772956e-05, "loss": 0.0152, "step": 4349 }, { "epoch": 1.02, "learning_rate": 1.535980853883715e-05, "loss": 0.0293, "step": 4350 }, { "epoch": 1.02, "learning_rate": 1.5357668783641882e-05, "loss": 0.0218, "step": 4351 }, { "epoch": 1.02, "learning_rate": 1.535552868432459e-05, "loss": 0.0572, "step": 4352 }, { "epoch": 1.02, "learning_rate": 1.5353388241022725e-05, "loss": 0.0249, "step": 4353 }, { "epoch": 1.02, "learning_rate": 1.5351247453873784e-05, "loss": 0.1525, "step": 4354 }, { "epoch": 1.02, "learning_rate": 1.534910632301525e-05, "loss": 0.1118, "step": 4355 }, { "epoch": 1.02, "learning_rate": 1.5346964848584663e-05, "loss": 0.0898, "step": 4356 }, { "epoch": 1.02, "learning_rate": 1.534482303071956e-05, "loss": 0.0122, "step": 4357 }, { "epoch": 1.02, "learning_rate": 1.534268086955751e-05, "loss": 0.0387, "step": 4358 }, { "epoch": 1.02, "learning_rate": 1.534053836523611e-05, "loss": 0.0458, "step": 4359 }, { "epoch": 1.02, "learning_rate": 1.5338395517892968e-05, "loss": 0.0814, "step": 4360 }, { "epoch": 1.02, "learning_rate": 1.5336252327665717e-05, "loss": 0.0281, "step": 4361 }, { "epoch": 1.02, "learning_rate": 1.5334108794692018e-05, "loss": 0.0181, "step": 4362 }, { "epoch": 1.02, "learning_rate": 1.5331964919109545e-05, "loss": 0.0443, "step": 4363 }, { "epoch": 1.02, "learning_rate": 1.5329820701056006e-05, "loss": 0.0533, "step": 4364 }, { "epoch": 1.02, "learning_rate": 1.532767614066912e-05, "loss": 0.0056, "step": 4365 }, { "epoch": 1.02, "learning_rate": 1.5325531238086625e-05, "loss": 0.055, "step": 4366 }, { "epoch": 1.02, "learning_rate": 1.5323385993446297e-05, "loss": 0.0246, "step": 4367 }, { "epoch": 1.03, "learning_rate": 1.532124040688592e-05, "loss": 0.0255, "step": 4368 }, { "epoch": 1.03, "learning_rate": 1.531909447854331e-05, "loss": 0.0462, "step": 4369 }, { "epoch": 1.03, "learning_rate": 1.5316948208556293e-05, "loss": 0.0292, "step": 4370 }, { "epoch": 1.03, "learning_rate": 1.5314801597062724e-05, "loss": 0.0417, "step": 4371 }, { "epoch": 1.03, "learning_rate": 1.5312654644200484e-05, "loss": 0.077, "step": 4372 }, { "epoch": 1.03, "learning_rate": 1.5310507350107466e-05, "loss": 0.0626, "step": 4373 }, { "epoch": 1.03, "learning_rate": 1.5308359714921593e-05, "loss": 0.0266, "step": 4374 }, { "epoch": 1.03, "learning_rate": 1.530621173878081e-05, "loss": 0.0555, "step": 4375 }, { "epoch": 1.03, "learning_rate": 1.5304063421823075e-05, "loss": 0.0404, "step": 4376 }, { "epoch": 1.03, "learning_rate": 1.5301914764186377e-05, "loss": 0.0079, "step": 4377 }, { "epoch": 1.03, "learning_rate": 1.529976576600873e-05, "loss": 0.0704, "step": 4378 }, { "epoch": 1.03, "learning_rate": 1.529761642742815e-05, "loss": 0.0429, "step": 4379 }, { "epoch": 1.03, "learning_rate": 1.5295466748582707e-05, "loss": 0.019, "step": 4380 }, { "epoch": 1.03, "learning_rate": 1.5293316729610452e-05, "loss": 0.0168, "step": 4381 }, { "epoch": 1.03, "learning_rate": 1.52911663706495e-05, "loss": 0.0482, "step": 4382 }, { "epoch": 1.03, "learning_rate": 1.5289015671837955e-05, "loss": 0.0782, "step": 4383 }, { "epoch": 1.03, "learning_rate": 1.5286864633313964e-05, "loss": 0.0214, "step": 4384 }, { "epoch": 1.03, "learning_rate": 1.5284713255215687e-05, "loss": 0.0325, "step": 4385 }, { "epoch": 1.03, "learning_rate": 1.52825615376813e-05, "loss": 0.0276, "step": 4386 }, { "epoch": 1.03, "learning_rate": 1.528040948084902e-05, "loss": 0.0411, "step": 4387 }, { "epoch": 1.03, "learning_rate": 1.5278257084857056e-05, "loss": 0.0212, "step": 4388 }, { "epoch": 1.03, "learning_rate": 1.5276104349843667e-05, "loss": 0.0284, "step": 4389 }, { "epoch": 1.03, "learning_rate": 1.5273951275947124e-05, "loss": 0.0796, "step": 4390 }, { "epoch": 1.03, "learning_rate": 1.5271797863305718e-05, "loss": 0.0407, "step": 4391 }, { "epoch": 1.03, "learning_rate": 1.5269644112057757e-05, "loss": 0.0596, "step": 4392 }, { "epoch": 1.03, "learning_rate": 1.5267490022341577e-05, "loss": 0.0347, "step": 4393 }, { "epoch": 1.03, "learning_rate": 1.5265335594295538e-05, "loss": 0.0541, "step": 4394 }, { "epoch": 1.03, "learning_rate": 1.526318082805802e-05, "loss": 0.049, "step": 4395 }, { "epoch": 1.03, "learning_rate": 1.5261025723767414e-05, "loss": 0.0327, "step": 4396 }, { "epoch": 1.03, "learning_rate": 1.5258870281562153e-05, "loss": 0.1225, "step": 4397 }, { "epoch": 1.03, "learning_rate": 1.5256714501580674e-05, "loss": 0.0471, "step": 4398 }, { "epoch": 1.03, "learning_rate": 1.5254558383961442e-05, "loss": 0.0415, "step": 4399 }, { "epoch": 1.03, "learning_rate": 1.5252401928842951e-05, "loss": 0.0472, "step": 4400 }, { "epoch": 1.03, "learning_rate": 1.5250245136363702e-05, "loss": 0.0836, "step": 4401 }, { "epoch": 1.03, "learning_rate": 1.5248088006662229e-05, "loss": 0.0337, "step": 4402 }, { "epoch": 1.03, "learning_rate": 1.5245930539877086e-05, "loss": 0.0152, "step": 4403 }, { "epoch": 1.03, "learning_rate": 1.524377273614684e-05, "loss": 0.0264, "step": 4404 }, { "epoch": 1.03, "learning_rate": 1.524161459561009e-05, "loss": 0.0109, "step": 4405 }, { "epoch": 1.03, "learning_rate": 1.5239456118405453e-05, "loss": 0.0233, "step": 4406 }, { "epoch": 1.03, "learning_rate": 1.523729730467157e-05, "loss": 0.0398, "step": 4407 }, { "epoch": 1.03, "learning_rate": 1.52351381545471e-05, "loss": 0.036, "step": 4408 }, { "epoch": 1.03, "learning_rate": 1.5232978668170722e-05, "loss": 0.0354, "step": 4409 }, { "epoch": 1.03, "learning_rate": 1.5230818845681143e-05, "loss": 0.0351, "step": 4410 }, { "epoch": 1.04, "learning_rate": 1.5228658687217084e-05, "loss": 0.0026, "step": 4411 }, { "epoch": 1.04, "learning_rate": 1.5226498192917294e-05, "loss": 0.0858, "step": 4412 }, { "epoch": 1.04, "learning_rate": 1.522433736292054e-05, "loss": 0.0076, "step": 4413 }, { "epoch": 1.04, "learning_rate": 1.5222176197365616e-05, "loss": 0.0941, "step": 4414 }, { "epoch": 1.04, "learning_rate": 1.5220014696391326e-05, "loss": 0.0548, "step": 4415 }, { "epoch": 1.04, "learning_rate": 1.5217852860136505e-05, "loss": 0.0366, "step": 4416 }, { "epoch": 1.04, "learning_rate": 1.5215690688740013e-05, "loss": 0.0113, "step": 4417 }, { "epoch": 1.04, "learning_rate": 1.5213528182340722e-05, "loss": 0.0701, "step": 4418 }, { "epoch": 1.04, "learning_rate": 1.5211365341077526e-05, "loss": 0.0388, "step": 4419 }, { "epoch": 1.04, "learning_rate": 1.520920216508935e-05, "loss": 0.0854, "step": 4420 }, { "epoch": 1.04, "learning_rate": 1.5207038654515129e-05, "loss": 0.03, "step": 4421 }, { "epoch": 1.04, "learning_rate": 1.5204874809493827e-05, "loss": 0.017, "step": 4422 }, { "epoch": 1.04, "learning_rate": 1.520271063016443e-05, "loss": 0.023, "step": 4423 }, { "epoch": 1.04, "learning_rate": 1.520054611666594e-05, "loss": 0.0479, "step": 4424 }, { "epoch": 1.04, "learning_rate": 1.5198381269137378e-05, "loss": 0.0084, "step": 4425 }, { "epoch": 1.04, "learning_rate": 1.5196216087717806e-05, "loss": 0.0347, "step": 4426 }, { "epoch": 1.04, "learning_rate": 1.5194050572546277e-05, "loss": 0.0615, "step": 4427 }, { "epoch": 1.04, "learning_rate": 1.5191884723761897e-05, "loss": 0.0536, "step": 4428 }, { "epoch": 1.04, "learning_rate": 1.5189718541503764e-05, "loss": 0.0213, "step": 4429 }, { "epoch": 1.04, "learning_rate": 1.518755202591102e-05, "loss": 0.0696, "step": 4430 }, { "epoch": 1.04, "learning_rate": 1.5185385177122815e-05, "loss": 0.0339, "step": 4431 }, { "epoch": 1.04, "learning_rate": 1.5183217995278332e-05, "loss": 0.0198, "step": 4432 }, { "epoch": 1.04, "learning_rate": 1.5181050480516759e-05, "loss": 0.0394, "step": 4433 }, { "epoch": 1.04, "learning_rate": 1.5178882632977327e-05, "loss": 0.0935, "step": 4434 }, { "epoch": 1.04, "learning_rate": 1.5176714452799263e-05, "loss": 0.0331, "step": 4435 }, { "epoch": 1.04, "learning_rate": 1.5174545940121843e-05, "loss": 0.0528, "step": 4436 }, { "epoch": 1.04, "learning_rate": 1.5172377095084338e-05, "loss": 0.0511, "step": 4437 }, { "epoch": 1.04, "learning_rate": 1.517020791782606e-05, "loss": 0.0294, "step": 4438 }, { "epoch": 1.04, "learning_rate": 1.5168038408486331e-05, "loss": 0.1204, "step": 4439 }, { "epoch": 1.04, "learning_rate": 1.51658685672045e-05, "loss": 0.051, "step": 4440 }, { "epoch": 1.04, "learning_rate": 1.5163698394119934e-05, "loss": 0.1119, "step": 4441 }, { "epoch": 1.04, "learning_rate": 1.5161527889372026e-05, "loss": 0.0086, "step": 4442 }, { "epoch": 1.04, "learning_rate": 1.515935705310018e-05, "loss": 0.0389, "step": 4443 }, { "epoch": 1.04, "learning_rate": 1.5157185885443838e-05, "loss": 0.0332, "step": 4444 }, { "epoch": 1.04, "learning_rate": 1.5155014386542447e-05, "loss": 0.0426, "step": 4445 }, { "epoch": 1.04, "learning_rate": 1.5152842556535485e-05, "loss": 0.0103, "step": 4446 }, { "epoch": 1.04, "learning_rate": 1.5150670395562445e-05, "loss": 0.0542, "step": 4447 }, { "epoch": 1.04, "learning_rate": 1.514849790376285e-05, "loss": 0.0354, "step": 4448 }, { "epoch": 1.04, "learning_rate": 1.514632508127623e-05, "loss": 0.0256, "step": 4449 }, { "epoch": 1.04, "learning_rate": 1.5144151928242154e-05, "loss": 0.1035, "step": 4450 }, { "epoch": 1.04, "learning_rate": 1.5141978444800196e-05, "loss": 0.0103, "step": 4451 }, { "epoch": 1.04, "learning_rate": 1.5139804631089967e-05, "loss": 0.1093, "step": 4452 }, { "epoch": 1.05, "learning_rate": 1.513763048725108e-05, "loss": 0.0268, "step": 4453 }, { "epoch": 1.05, "learning_rate": 1.513545601342319e-05, "loss": 0.0734, "step": 4454 }, { "epoch": 1.05, "learning_rate": 1.5133281209745953e-05, "loss": 0.051, "step": 4455 }, { "epoch": 1.05, "learning_rate": 1.5131106076359068e-05, "loss": 0.0297, "step": 4456 }, { "epoch": 1.05, "learning_rate": 1.5128930613402233e-05, "loss": 0.0054, "step": 4457 }, { "epoch": 1.05, "learning_rate": 1.5126754821015181e-05, "loss": 0.0422, "step": 4458 }, { "epoch": 1.05, "learning_rate": 1.5124578699337665e-05, "loss": 0.038, "step": 4459 }, { "epoch": 1.05, "learning_rate": 1.5122402248509453e-05, "loss": 0.0998, "step": 4460 }, { "epoch": 1.05, "learning_rate": 1.5120225468670341e-05, "loss": 0.0405, "step": 4461 }, { "epoch": 1.05, "learning_rate": 1.5118048359960145e-05, "loss": 0.0279, "step": 4462 }, { "epoch": 1.05, "learning_rate": 1.5115870922518694e-05, "loss": 0.0449, "step": 4463 }, { "epoch": 1.05, "learning_rate": 1.5113693156485849e-05, "loss": 0.0482, "step": 4464 }, { "epoch": 1.05, "learning_rate": 1.5111515062001487e-05, "loss": 0.1009, "step": 4465 }, { "epoch": 1.05, "learning_rate": 1.5109336639205505e-05, "loss": 0.0257, "step": 4466 }, { "epoch": 1.05, "learning_rate": 1.5107157888237826e-05, "loss": 0.0162, "step": 4467 }, { "epoch": 1.05, "learning_rate": 1.5104978809238385e-05, "loss": 0.0357, "step": 4468 }, { "epoch": 1.05, "learning_rate": 1.5102799402347148e-05, "loss": 0.0325, "step": 4469 }, { "epoch": 1.05, "learning_rate": 1.51006196677041e-05, "loss": 0.0451, "step": 4470 }, { "epoch": 1.05, "learning_rate": 1.5098439605449238e-05, "loss": 0.0372, "step": 4471 }, { "epoch": 1.05, "learning_rate": 1.5096259215722593e-05, "loss": 0.029, "step": 4472 }, { "epoch": 1.05, "learning_rate": 1.5094078498664208e-05, "loss": 0.0631, "step": 4473 }, { "epoch": 1.05, "learning_rate": 1.5091897454414156e-05, "loss": 0.0456, "step": 4474 }, { "epoch": 1.05, "learning_rate": 1.5089716083112514e-05, "loss": 0.1039, "step": 4475 }, { "epoch": 1.05, "learning_rate": 1.5087534384899398e-05, "loss": 0.0081, "step": 4476 }, { "epoch": 1.05, "learning_rate": 1.5085352359914938e-05, "loss": 0.0202, "step": 4477 }, { "epoch": 1.05, "learning_rate": 1.5083170008299286e-05, "loss": 0.0274, "step": 4478 }, { "epoch": 1.05, "learning_rate": 1.5080987330192609e-05, "loss": 0.0261, "step": 4479 }, { "epoch": 1.05, "learning_rate": 1.5078804325735105e-05, "loss": 0.0279, "step": 4480 }, { "epoch": 1.05, "learning_rate": 1.5076620995066988e-05, "loss": 0.084, "step": 4481 }, { "epoch": 1.05, "learning_rate": 1.5074437338328492e-05, "loss": 0.0376, "step": 4482 }, { "epoch": 1.05, "learning_rate": 1.5072253355659867e-05, "loss": 0.0184, "step": 4483 }, { "epoch": 1.05, "learning_rate": 1.5070069047201397e-05, "loss": 0.0622, "step": 4484 }, { "epoch": 1.05, "learning_rate": 1.5067884413093379e-05, "loss": 0.0249, "step": 4485 }, { "epoch": 1.05, "learning_rate": 1.5065699453476127e-05, "loss": 0.0223, "step": 4486 }, { "epoch": 1.05, "learning_rate": 1.5063514168489985e-05, "loss": 0.0785, "step": 4487 }, { "epoch": 1.05, "learning_rate": 1.5061328558275312e-05, "loss": 0.0341, "step": 4488 }, { "epoch": 1.05, "learning_rate": 1.5059142622972491e-05, "loss": 0.0159, "step": 4489 }, { "epoch": 1.05, "learning_rate": 1.5056956362721922e-05, "loss": 0.0662, "step": 4490 }, { "epoch": 1.05, "learning_rate": 1.5054769777664027e-05, "loss": 0.0512, "step": 4491 }, { "epoch": 1.05, "learning_rate": 1.5052582867939251e-05, "loss": 0.0116, "step": 4492 }, { "epoch": 1.05, "learning_rate": 1.5050395633688062e-05, "loss": 0.0077, "step": 4493 }, { "epoch": 1.05, "learning_rate": 1.504820807505094e-05, "loss": 0.0258, "step": 4494 }, { "epoch": 1.05, "learning_rate": 1.5046020192168395e-05, "loss": 0.0233, "step": 4495 }, { "epoch": 1.06, "learning_rate": 1.5043831985180956e-05, "loss": 0.0188, "step": 4496 }, { "epoch": 1.06, "learning_rate": 1.504164345422917e-05, "loss": 0.0286, "step": 4497 }, { "epoch": 1.06, "learning_rate": 1.5039454599453602e-05, "loss": 0.0514, "step": 4498 }, { "epoch": 1.06, "learning_rate": 1.5037265420994845e-05, "loss": 0.0516, "step": 4499 }, { "epoch": 1.06, "learning_rate": 1.503507591899351e-05, "loss": 0.0505, "step": 4500 }, { "epoch": 1.06, "learning_rate": 1.5032886093590224e-05, "loss": 0.0101, "step": 4501 }, { "epoch": 1.06, "learning_rate": 1.5030695944925646e-05, "loss": 0.0425, "step": 4502 }, { "epoch": 1.06, "learning_rate": 1.5028505473140445e-05, "loss": 0.02, "step": 4503 }, { "epoch": 1.06, "learning_rate": 1.5026314678375312e-05, "loss": 0.0538, "step": 4504 }, { "epoch": 1.06, "learning_rate": 1.5024123560770965e-05, "loss": 0.0081, "step": 4505 }, { "epoch": 1.06, "learning_rate": 1.502193212046814e-05, "loss": 0.0935, "step": 4506 }, { "epoch": 1.06, "learning_rate": 1.5019740357607587e-05, "loss": 0.0561, "step": 4507 }, { "epoch": 1.06, "learning_rate": 1.501754827233009e-05, "loss": 0.0124, "step": 4508 }, { "epoch": 1.06, "learning_rate": 1.5015355864776441e-05, "loss": 0.1175, "step": 4509 }, { "epoch": 1.06, "learning_rate": 1.5013163135087458e-05, "loss": 0.0515, "step": 4510 }, { "epoch": 1.06, "learning_rate": 1.5010970083403986e-05, "loss": 0.0154, "step": 4511 }, { "epoch": 1.06, "learning_rate": 1.5008776709866877e-05, "loss": 0.0978, "step": 4512 }, { "epoch": 1.06, "learning_rate": 1.5006583014617012e-05, "loss": 0.1268, "step": 4513 }, { "epoch": 1.06, "learning_rate": 1.5004388997795293e-05, "loss": 0.0805, "step": 4514 }, { "epoch": 1.06, "learning_rate": 1.5002194659542642e-05, "loss": 0.0175, "step": 4515 }, { "epoch": 1.06, "learning_rate": 1.5000000000000002e-05, "loss": 0.0305, "step": 4516 }, { "epoch": 1.06, "learning_rate": 1.4997805019308332e-05, "loss": 0.008, "step": 4517 }, { "epoch": 1.06, "learning_rate": 1.4995609717608614e-05, "loss": 0.0709, "step": 4518 }, { "epoch": 1.06, "learning_rate": 1.499341409504186e-05, "loss": 0.0041, "step": 4519 }, { "epoch": 1.06, "learning_rate": 1.4991218151749087e-05, "loss": 0.0238, "step": 4520 }, { "epoch": 1.06, "learning_rate": 1.4989021887871342e-05, "loss": 0.0573, "step": 4521 }, { "epoch": 1.06, "learning_rate": 1.498682530354969e-05, "loss": 0.0344, "step": 4522 }, { "epoch": 1.06, "learning_rate": 1.4984628398925219e-05, "loss": 0.0272, "step": 4523 }, { "epoch": 1.06, "learning_rate": 1.4982431174139036e-05, "loss": 0.0323, "step": 4524 }, { "epoch": 1.06, "learning_rate": 1.4980233629332264e-05, "loss": 0.1207, "step": 4525 }, { "epoch": 1.06, "learning_rate": 1.4978035764646058e-05, "loss": 0.0243, "step": 4526 }, { "epoch": 1.06, "learning_rate": 1.4975837580221576e-05, "loss": 0.0485, "step": 4527 }, { "epoch": 1.06, "learning_rate": 1.4973639076200017e-05, "loss": 0.0538, "step": 4528 }, { "epoch": 1.06, "learning_rate": 1.4971440252722586e-05, "loss": 0.0752, "step": 4529 }, { "epoch": 1.06, "learning_rate": 1.4969241109930515e-05, "loss": 0.0736, "step": 4530 }, { "epoch": 1.06, "learning_rate": 1.4967041647965053e-05, "loss": 0.0813, "step": 4531 }, { "epoch": 1.06, "learning_rate": 1.496484186696747e-05, "loss": 0.018, "step": 4532 }, { "epoch": 1.06, "learning_rate": 1.496264176707906e-05, "loss": 0.0255, "step": 4533 }, { "epoch": 1.06, "learning_rate": 1.4960441348441135e-05, "loss": 0.0484, "step": 4534 }, { "epoch": 1.06, "learning_rate": 1.4958240611195024e-05, "loss": 0.0996, "step": 4535 }, { "epoch": 1.06, "learning_rate": 1.495603955548208e-05, "loss": 0.0113, "step": 4536 }, { "epoch": 1.06, "learning_rate": 1.4953838181443683e-05, "loss": 0.0425, "step": 4537 }, { "epoch": 1.07, "learning_rate": 1.495163648922122e-05, "loss": 0.0178, "step": 4538 }, { "epoch": 1.07, "learning_rate": 1.494943447895611e-05, "loss": 0.0263, "step": 4539 }, { "epoch": 1.07, "learning_rate": 1.4947232150789783e-05, "loss": 0.0485, "step": 4540 }, { "epoch": 1.07, "learning_rate": 1.4945029504863694e-05, "loss": 0.125, "step": 4541 }, { "epoch": 1.07, "learning_rate": 1.4942826541319327e-05, "loss": 0.0147, "step": 4542 }, { "epoch": 1.07, "learning_rate": 1.4940623260298168e-05, "loss": 0.0047, "step": 4543 }, { "epoch": 1.07, "learning_rate": 1.4938419661941738e-05, "loss": 0.0111, "step": 4544 }, { "epoch": 1.07, "learning_rate": 1.4936215746391573e-05, "loss": 0.0453, "step": 4545 }, { "epoch": 1.07, "learning_rate": 1.4934011513789228e-05, "loss": 0.023, "step": 4546 }, { "epoch": 1.07, "learning_rate": 1.4931806964276288e-05, "loss": 0.0759, "step": 4547 }, { "epoch": 1.07, "learning_rate": 1.492960209799434e-05, "loss": 0.1302, "step": 4548 }, { "epoch": 1.07, "learning_rate": 1.492739691508501e-05, "loss": 0.0273, "step": 4549 }, { "epoch": 1.07, "learning_rate": 1.4925191415689933e-05, "loss": 0.0778, "step": 4550 }, { "epoch": 1.07, "learning_rate": 1.4922985599950765e-05, "loss": 0.0674, "step": 4551 }, { "epoch": 1.07, "learning_rate": 1.4920779468009192e-05, "loss": 0.0049, "step": 4552 }, { "epoch": 1.07, "learning_rate": 1.491857302000691e-05, "loss": 0.0674, "step": 4553 }, { "epoch": 1.07, "learning_rate": 1.491636625608564e-05, "loss": 0.0712, "step": 4554 }, { "epoch": 1.07, "learning_rate": 1.491415917638712e-05, "loss": 0.0666, "step": 4555 }, { "epoch": 1.07, "learning_rate": 1.4911951781053109e-05, "loss": 0.0242, "step": 4556 }, { "epoch": 1.07, "learning_rate": 1.4909744070225392e-05, "loss": 0.0431, "step": 4557 }, { "epoch": 1.07, "learning_rate": 1.4907536044045767e-05, "loss": 0.0932, "step": 4558 }, { "epoch": 1.07, "learning_rate": 1.4905327702656054e-05, "loss": 0.0239, "step": 4559 }, { "epoch": 1.07, "learning_rate": 1.49031190461981e-05, "loss": 0.013, "step": 4560 }, { "epoch": 1.07, "learning_rate": 1.4900910074813759e-05, "loss": 0.0296, "step": 4561 }, { "epoch": 1.07, "learning_rate": 1.4898700788644915e-05, "loss": 0.0512, "step": 4562 }, { "epoch": 1.07, "learning_rate": 1.4896491187833477e-05, "loss": 0.0167, "step": 4563 }, { "epoch": 1.07, "learning_rate": 1.4894281272521359e-05, "loss": 0.0574, "step": 4564 }, { "epoch": 1.07, "learning_rate": 1.4892071042850504e-05, "loss": 0.0685, "step": 4565 }, { "epoch": 1.07, "learning_rate": 1.4889860498962877e-05, "loss": 0.0502, "step": 4566 }, { "epoch": 1.07, "learning_rate": 1.4887649641000464e-05, "loss": 0.042, "step": 4567 }, { "epoch": 1.07, "learning_rate": 1.4885438469105263e-05, "loss": 0.0108, "step": 4568 }, { "epoch": 1.07, "learning_rate": 1.4883226983419295e-05, "loss": 0.0238, "step": 4569 }, { "epoch": 1.07, "learning_rate": 1.4881015184084611e-05, "loss": 0.0101, "step": 4570 }, { "epoch": 1.07, "learning_rate": 1.4878803071243269e-05, "loss": 0.0178, "step": 4571 }, { "epoch": 1.07, "learning_rate": 1.4876590645037351e-05, "loss": 0.013, "step": 4572 }, { "epoch": 1.07, "learning_rate": 1.487437790560897e-05, "loss": 0.0615, "step": 4573 }, { "epoch": 1.07, "learning_rate": 1.4872164853100239e-05, "loss": 0.0373, "step": 4574 }, { "epoch": 1.07, "learning_rate": 1.4869951487653306e-05, "loss": 0.0312, "step": 4575 }, { "epoch": 1.07, "learning_rate": 1.4867737809410339e-05, "loss": 0.073, "step": 4576 }, { "epoch": 1.07, "learning_rate": 1.4865523818513514e-05, "loss": 0.0286, "step": 4577 }, { "epoch": 1.07, "learning_rate": 1.4863309515105042e-05, "loss": 0.0075, "step": 4578 }, { "epoch": 1.07, "learning_rate": 1.4861094899327146e-05, "loss": 0.0375, "step": 4579 }, { "epoch": 1.07, "learning_rate": 1.4858879971322066e-05, "loss": 0.0686, "step": 4580 }, { "epoch": 1.08, "learning_rate": 1.4856664731232076e-05, "loss": 0.0954, "step": 4581 }, { "epoch": 1.08, "learning_rate": 1.4854449179199448e-05, "loss": 0.0232, "step": 4582 }, { "epoch": 1.08, "learning_rate": 1.4852233315366497e-05, "loss": 0.093, "step": 4583 }, { "epoch": 1.08, "learning_rate": 1.4850017139875541e-05, "loss": 0.0297, "step": 4584 }, { "epoch": 1.08, "learning_rate": 1.4847800652868927e-05, "loss": 0.022, "step": 4585 }, { "epoch": 1.08, "learning_rate": 1.4845583854489018e-05, "loss": 0.0559, "step": 4586 }, { "epoch": 1.08, "learning_rate": 1.48433667448782e-05, "loss": 0.0382, "step": 4587 }, { "epoch": 1.08, "learning_rate": 1.4841149324178879e-05, "loss": 0.1078, "step": 4588 }, { "epoch": 1.08, "learning_rate": 1.4838931592533476e-05, "loss": 0.0367, "step": 4589 }, { "epoch": 1.08, "learning_rate": 1.4836713550084438e-05, "loss": 0.0848, "step": 4590 }, { "epoch": 1.08, "learning_rate": 1.483449519697423e-05, "loss": 0.016, "step": 4591 }, { "epoch": 1.08, "learning_rate": 1.4832276533345333e-05, "loss": 0.0989, "step": 4592 }, { "epoch": 1.08, "learning_rate": 1.4830057559340256e-05, "loss": 0.0092, "step": 4593 }, { "epoch": 1.08, "learning_rate": 1.4827838275101517e-05, "loss": 0.0557, "step": 4594 }, { "epoch": 1.08, "learning_rate": 1.4825618680771666e-05, "loss": 0.0612, "step": 4595 }, { "epoch": 1.08, "learning_rate": 1.4823398776493264e-05, "loss": 0.0622, "step": 4596 }, { "epoch": 1.08, "learning_rate": 1.4821178562408897e-05, "loss": 0.0284, "step": 4597 }, { "epoch": 1.08, "learning_rate": 1.4818958038661167e-05, "loss": 0.0104, "step": 4598 }, { "epoch": 1.08, "learning_rate": 1.4816737205392699e-05, "loss": 0.0203, "step": 4599 }, { "epoch": 1.08, "learning_rate": 1.4814516062746136e-05, "loss": 0.0223, "step": 4600 }, { "epoch": 1.08, "learning_rate": 1.4812294610864145e-05, "loss": 0.0374, "step": 4601 }, { "epoch": 1.08, "learning_rate": 1.4810072849889402e-05, "loss": 0.0283, "step": 4602 }, { "epoch": 1.08, "learning_rate": 1.4807850779964618e-05, "loss": 0.024, "step": 4603 }, { "epoch": 1.08, "learning_rate": 1.480562840123251e-05, "loss": 0.0742, "step": 4604 }, { "epoch": 1.08, "learning_rate": 1.4803405713835827e-05, "loss": 0.0491, "step": 4605 }, { "epoch": 1.08, "learning_rate": 1.4801182717917325e-05, "loss": 0.0225, "step": 4606 }, { "epoch": 1.08, "learning_rate": 1.4798959413619796e-05, "loss": 0.0745, "step": 4607 }, { "epoch": 1.08, "learning_rate": 1.479673580108603e-05, "loss": 0.0157, "step": 4608 }, { "epoch": 1.08, "learning_rate": 1.479451188045886e-05, "loss": 0.0299, "step": 4609 }, { "epoch": 1.08, "learning_rate": 1.479228765188112e-05, "loss": 0.0556, "step": 4610 }, { "epoch": 1.08, "learning_rate": 1.4790063115495678e-05, "loss": 0.029, "step": 4611 }, { "epoch": 1.08, "learning_rate": 1.4787838271445411e-05, "loss": 0.094, "step": 4612 }, { "epoch": 1.08, "learning_rate": 1.4785613119873226e-05, "loss": 0.0083, "step": 4613 }, { "epoch": 1.08, "learning_rate": 1.4783387660922036e-05, "loss": 0.0474, "step": 4614 }, { "epoch": 1.08, "learning_rate": 1.478116189473479e-05, "loss": 0.0605, "step": 4615 }, { "epoch": 1.08, "learning_rate": 1.4778935821454444e-05, "loss": 0.0381, "step": 4616 }, { "epoch": 1.08, "learning_rate": 1.4776709441223977e-05, "loss": 0.1219, "step": 4617 }, { "epoch": 1.08, "learning_rate": 1.477448275418639e-05, "loss": 0.0365, "step": 4618 }, { "epoch": 1.08, "learning_rate": 1.4772255760484709e-05, "loss": 0.0262, "step": 4619 }, { "epoch": 1.08, "learning_rate": 1.4770028460261963e-05, "loss": 0.0461, "step": 4620 }, { "epoch": 1.08, "learning_rate": 1.4767800853661217e-05, "loss": 0.0417, "step": 4621 }, { "epoch": 1.08, "learning_rate": 1.4765572940825547e-05, "loss": 0.0371, "step": 4622 }, { "epoch": 1.08, "learning_rate": 1.4763344721898056e-05, "loss": 0.0018, "step": 4623 }, { "epoch": 1.09, "learning_rate": 1.4761116197021861e-05, "loss": 0.0154, "step": 4624 }, { "epoch": 1.09, "learning_rate": 1.4758887366340096e-05, "loss": 0.0591, "step": 4625 }, { "epoch": 1.09, "learning_rate": 1.4756658229995919e-05, "loss": 0.0396, "step": 4626 }, { "epoch": 1.09, "learning_rate": 1.4754428788132511e-05, "loss": 0.0871, "step": 4627 }, { "epoch": 1.09, "learning_rate": 1.4752199040893063e-05, "loss": 0.0917, "step": 4628 }, { "epoch": 1.09, "learning_rate": 1.4749968988420795e-05, "loss": 0.0322, "step": 4629 }, { "epoch": 1.09, "learning_rate": 1.4747738630858941e-05, "loss": 0.0739, "step": 4630 }, { "epoch": 1.09, "learning_rate": 1.4745507968350761e-05, "loss": 0.0305, "step": 4631 }, { "epoch": 1.09, "learning_rate": 1.4743277001039522e-05, "loss": 0.0631, "step": 4632 }, { "epoch": 1.09, "learning_rate": 1.4741045729068529e-05, "loss": 0.0472, "step": 4633 }, { "epoch": 1.09, "learning_rate": 1.4738814152581085e-05, "loss": 0.0865, "step": 4634 }, { "epoch": 1.09, "learning_rate": 1.4736582271720532e-05, "loss": 0.0514, "step": 4635 }, { "epoch": 1.09, "learning_rate": 1.4734350086630217e-05, "loss": 0.0055, "step": 4636 }, { "epoch": 1.09, "learning_rate": 1.473211759745352e-05, "loss": 0.1166, "step": 4637 }, { "epoch": 1.09, "learning_rate": 1.4729884804333826e-05, "loss": 0.0267, "step": 4638 }, { "epoch": 1.09, "learning_rate": 1.4727651707414552e-05, "loss": 0.0397, "step": 4639 }, { "epoch": 1.09, "learning_rate": 1.4725418306839126e-05, "loss": 0.0789, "step": 4640 }, { "epoch": 1.09, "learning_rate": 1.4723184602751001e-05, "loss": 0.0491, "step": 4641 }, { "epoch": 1.09, "learning_rate": 1.4720950595293647e-05, "loss": 0.0584, "step": 4642 }, { "epoch": 1.09, "learning_rate": 1.4718716284610557e-05, "loss": 0.0237, "step": 4643 }, { "epoch": 1.09, "learning_rate": 1.4716481670845232e-05, "loss": 0.0522, "step": 4644 }, { "epoch": 1.09, "learning_rate": 1.471424675414121e-05, "loss": 0.1107, "step": 4645 }, { "epoch": 1.09, "learning_rate": 1.4712011534642034e-05, "loss": 0.0399, "step": 4646 }, { "epoch": 1.09, "learning_rate": 1.470977601249127e-05, "loss": 0.0386, "step": 4647 }, { "epoch": 1.09, "learning_rate": 1.470754018783251e-05, "loss": 0.0486, "step": 4648 }, { "epoch": 1.09, "learning_rate": 1.4705304060809362e-05, "loss": 0.0094, "step": 4649 }, { "epoch": 1.09, "learning_rate": 1.4703067631565444e-05, "loss": 0.0798, "step": 4650 }, { "epoch": 1.09, "learning_rate": 1.470083090024441e-05, "loss": 0.094, "step": 4651 }, { "epoch": 1.09, "learning_rate": 1.4698593866989917e-05, "loss": 0.0391, "step": 4652 }, { "epoch": 1.09, "learning_rate": 1.4696356531945654e-05, "loss": 0.0209, "step": 4653 }, { "epoch": 1.09, "learning_rate": 1.4694118895255325e-05, "loss": 0.0378, "step": 4654 }, { "epoch": 1.09, "learning_rate": 1.4691880957062652e-05, "loss": 0.0152, "step": 4655 }, { "epoch": 1.09, "learning_rate": 1.4689642717511375e-05, "loss": 0.1174, "step": 4656 }, { "epoch": 1.09, "learning_rate": 1.468740417674526e-05, "loss": 0.0139, "step": 4657 }, { "epoch": 1.09, "learning_rate": 1.4685165334908083e-05, "loss": 0.0153, "step": 4658 }, { "epoch": 1.09, "learning_rate": 1.468292619214365e-05, "loss": 0.0147, "step": 4659 }, { "epoch": 1.09, "learning_rate": 1.4680686748595774e-05, "loss": 0.0114, "step": 4660 }, { "epoch": 1.09, "learning_rate": 1.4678447004408304e-05, "loss": 0.0304, "step": 4661 }, { "epoch": 1.09, "learning_rate": 1.4676206959725085e-05, "loss": 0.0631, "step": 4662 }, { "epoch": 1.09, "learning_rate": 1.4673966614690006e-05, "loss": 0.0914, "step": 4663 }, { "epoch": 1.09, "learning_rate": 1.4671725969446957e-05, "loss": 0.0247, "step": 4664 }, { "epoch": 1.09, "learning_rate": 1.466948502413986e-05, "loss": 0.0173, "step": 4665 }, { "epoch": 1.1, "learning_rate": 1.4667243778912645e-05, "loss": 0.0842, "step": 4666 }, { "epoch": 1.1, "learning_rate": 1.4665002233909271e-05, "loss": 0.0675, "step": 4667 }, { "epoch": 1.1, "learning_rate": 1.4662760389273708e-05, "loss": 0.0377, "step": 4668 }, { "epoch": 1.1, "learning_rate": 1.4660518245149955e-05, "loss": 0.0584, "step": 4669 }, { "epoch": 1.1, "learning_rate": 1.4658275801682019e-05, "loss": 0.0377, "step": 4670 }, { "epoch": 1.1, "learning_rate": 1.4656033059013933e-05, "loss": 0.0303, "step": 4671 }, { "epoch": 1.1, "learning_rate": 1.4653790017289746e-05, "loss": 0.1548, "step": 4672 }, { "epoch": 1.1, "learning_rate": 1.4651546676653535e-05, "loss": 0.0224, "step": 4673 }, { "epoch": 1.1, "learning_rate": 1.464930303724938e-05, "loss": 0.0615, "step": 4674 }, { "epoch": 1.1, "learning_rate": 1.46470590992214e-05, "loss": 0.0166, "step": 4675 }, { "epoch": 1.1, "learning_rate": 1.4644814862713715e-05, "loss": 0.036, "step": 4676 }, { "epoch": 1.1, "learning_rate": 1.4642570327870476e-05, "loss": 0.0398, "step": 4677 }, { "epoch": 1.1, "learning_rate": 1.4640325494835843e-05, "loss": 0.0324, "step": 4678 }, { "epoch": 1.1, "learning_rate": 1.4638080363754008e-05, "loss": 0.0262, "step": 4679 }, { "epoch": 1.1, "learning_rate": 1.4635834934769173e-05, "loss": 0.0705, "step": 4680 }, { "epoch": 1.1, "learning_rate": 1.4633589208025559e-05, "loss": 0.0404, "step": 4681 }, { "epoch": 1.1, "learning_rate": 1.463134318366741e-05, "loss": 0.0448, "step": 4682 }, { "epoch": 1.1, "learning_rate": 1.4629096861838991e-05, "loss": 0.1234, "step": 4683 }, { "epoch": 1.1, "learning_rate": 1.462685024268458e-05, "loss": 0.1224, "step": 4684 }, { "epoch": 1.1, "learning_rate": 1.4624603326348478e-05, "loss": 0.0492, "step": 4685 }, { "epoch": 1.1, "learning_rate": 1.4622356112975e-05, "loss": 0.0832, "step": 4686 }, { "epoch": 1.1, "learning_rate": 1.4620108602708492e-05, "loss": 0.1031, "step": 4687 }, { "epoch": 1.1, "learning_rate": 1.4617860795693301e-05, "loss": 0.0729, "step": 4688 }, { "epoch": 1.1, "learning_rate": 1.4615612692073813e-05, "loss": 0.1474, "step": 4689 }, { "epoch": 1.1, "learning_rate": 1.4613364291994411e-05, "loss": 0.0147, "step": 4690 }, { "epoch": 1.1, "learning_rate": 1.4611115595599525e-05, "loss": 0.0285, "step": 4691 }, { "epoch": 1.1, "learning_rate": 1.4608866603033577e-05, "loss": 0.0501, "step": 4692 }, { "epoch": 1.1, "learning_rate": 1.4606617314441027e-05, "loss": 0.0224, "step": 4693 }, { "epoch": 1.1, "learning_rate": 1.4604367729966338e-05, "loss": 0.0626, "step": 4694 }, { "epoch": 1.1, "learning_rate": 1.4602117849754007e-05, "loss": 0.044, "step": 4695 }, { "epoch": 1.1, "learning_rate": 1.4599867673948538e-05, "loss": 0.039, "step": 4696 }, { "epoch": 1.1, "learning_rate": 1.4597617202694465e-05, "loss": 0.0907, "step": 4697 }, { "epoch": 1.1, "learning_rate": 1.4595366436136329e-05, "loss": 0.0423, "step": 4698 }, { "epoch": 1.1, "learning_rate": 1.4593115374418705e-05, "loss": 0.0637, "step": 4699 }, { "epoch": 1.1, "learning_rate": 1.4590864017686168e-05, "loss": 0.0819, "step": 4700 }, { "epoch": 1.1, "learning_rate": 1.4588612366083333e-05, "loss": 0.0494, "step": 4701 }, { "epoch": 1.1, "learning_rate": 1.4586360419754813e-05, "loss": 0.0426, "step": 4702 }, { "epoch": 1.1, "learning_rate": 1.4584108178845257e-05, "loss": 0.0283, "step": 4703 }, { "epoch": 1.1, "learning_rate": 1.4581855643499321e-05, "loss": 0.0792, "step": 4704 }, { "epoch": 1.1, "learning_rate": 1.4579602813861691e-05, "loss": 0.0359, "step": 4705 }, { "epoch": 1.1, "learning_rate": 1.457734969007706e-05, "loss": 0.1146, "step": 4706 }, { "epoch": 1.1, "learning_rate": 1.4575096272290148e-05, "loss": 0.0386, "step": 4707 }, { "epoch": 1.1, "learning_rate": 1.457284256064569e-05, "loss": 0.0694, "step": 4708 }, { "epoch": 1.11, "learning_rate": 1.4570588555288447e-05, "loss": 0.0284, "step": 4709 }, { "epoch": 1.11, "learning_rate": 1.4568334256363185e-05, "loss": 0.0322, "step": 4710 }, { "epoch": 1.11, "learning_rate": 1.4566079664014705e-05, "loss": 0.0131, "step": 4711 }, { "epoch": 1.11, "learning_rate": 1.4563824778387817e-05, "loss": 0.0241, "step": 4712 }, { "epoch": 1.11, "learning_rate": 1.4561569599627347e-05, "loss": 0.0081, "step": 4713 }, { "epoch": 1.11, "learning_rate": 1.4559314127878148e-05, "loss": 0.0324, "step": 4714 }, { "epoch": 1.11, "learning_rate": 1.4557058363285087e-05, "loss": 0.0424, "step": 4715 }, { "epoch": 1.11, "learning_rate": 1.4554802305993057e-05, "loss": 0.121, "step": 4716 }, { "epoch": 1.11, "learning_rate": 1.4552545956146959e-05, "loss": 0.0356, "step": 4717 }, { "epoch": 1.11, "learning_rate": 1.4550289313891718e-05, "loss": 0.0472, "step": 4718 }, { "epoch": 1.11, "learning_rate": 1.4548032379372284e-05, "loss": 0.0457, "step": 4719 }, { "epoch": 1.11, "learning_rate": 1.4545775152733607e-05, "loss": 0.0284, "step": 4720 }, { "epoch": 1.11, "learning_rate": 1.454351763412068e-05, "loss": 0.0302, "step": 4721 }, { "epoch": 1.11, "learning_rate": 1.4541259823678496e-05, "loss": 0.0297, "step": 4722 }, { "epoch": 1.11, "learning_rate": 1.4539001721552077e-05, "loss": 0.0482, "step": 4723 }, { "epoch": 1.11, "learning_rate": 1.4536743327886456e-05, "loss": 0.0171, "step": 4724 }, { "epoch": 1.11, "learning_rate": 1.4534484642826696e-05, "loss": 0.0659, "step": 4725 }, { "epoch": 1.11, "learning_rate": 1.453222566651787e-05, "loss": 0.0364, "step": 4726 }, { "epoch": 1.11, "learning_rate": 1.4529966399105069e-05, "loss": 0.0075, "step": 4727 }, { "epoch": 1.11, "learning_rate": 1.4527706840733404e-05, "loss": 0.0839, "step": 4728 }, { "epoch": 1.11, "learning_rate": 1.4525446991548014e-05, "loss": 0.0658, "step": 4729 }, { "epoch": 1.11, "learning_rate": 1.452318685169404e-05, "loss": 0.0311, "step": 4730 }, { "epoch": 1.11, "learning_rate": 1.4520926421316654e-05, "loss": 0.0042, "step": 4731 }, { "epoch": 1.11, "learning_rate": 1.4518665700561042e-05, "loss": 0.0043, "step": 4732 }, { "epoch": 1.11, "learning_rate": 1.451640468957241e-05, "loss": 0.0275, "step": 4733 }, { "epoch": 1.11, "learning_rate": 1.4514143388495981e-05, "loss": 0.0503, "step": 4734 }, { "epoch": 1.11, "learning_rate": 1.4511881797477004e-05, "loss": 0.0456, "step": 4735 }, { "epoch": 1.11, "learning_rate": 1.4509619916660734e-05, "loss": 0.0211, "step": 4736 }, { "epoch": 1.11, "learning_rate": 1.4507357746192452e-05, "loss": 0.0731, "step": 4737 }, { "epoch": 1.11, "learning_rate": 1.4505095286217458e-05, "loss": 0.0333, "step": 4738 }, { "epoch": 1.11, "learning_rate": 1.4502832536881072e-05, "loss": 0.0163, "step": 4739 }, { "epoch": 1.11, "learning_rate": 1.4500569498328622e-05, "loss": 0.0766, "step": 4740 }, { "epoch": 1.11, "learning_rate": 1.4498306170705472e-05, "loss": 0.0745, "step": 4741 }, { "epoch": 1.11, "learning_rate": 1.4496042554156989e-05, "loss": 0.03, "step": 4742 }, { "epoch": 1.11, "learning_rate": 1.4493778648828569e-05, "loss": 0.025, "step": 4743 }, { "epoch": 1.11, "learning_rate": 1.4491514454865615e-05, "loss": 0.0128, "step": 4744 }, { "epoch": 1.11, "learning_rate": 1.4489249972413565e-05, "loss": 0.0125, "step": 4745 }, { "epoch": 1.11, "learning_rate": 1.4486985201617861e-05, "loss": 0.0408, "step": 4746 }, { "epoch": 1.11, "learning_rate": 1.4484720142623972e-05, "loss": 0.0696, "step": 4747 }, { "epoch": 1.11, "learning_rate": 1.4482454795577374e-05, "loss": 0.0461, "step": 4748 }, { "epoch": 1.11, "learning_rate": 1.4480189160623582e-05, "loss": 0.0235, "step": 4749 }, { "epoch": 1.11, "learning_rate": 1.4477923237908103e-05, "loss": 0.0472, "step": 4750 }, { "epoch": 1.12, "learning_rate": 1.4475657027576491e-05, "loss": 0.0298, "step": 4751 }, { "epoch": 1.12, "learning_rate": 1.4473390529774294e-05, "loss": 0.048, "step": 4752 }, { "epoch": 1.12, "learning_rate": 1.4471123744647097e-05, "loss": 0.0159, "step": 4753 }, { "epoch": 1.12, "learning_rate": 1.4468856672340487e-05, "loss": 0.0214, "step": 4754 }, { "epoch": 1.12, "learning_rate": 1.4466589313000083e-05, "loss": 0.0167, "step": 4755 }, { "epoch": 1.12, "learning_rate": 1.4464321666771514e-05, "loss": 0.038, "step": 4756 }, { "epoch": 1.12, "learning_rate": 1.4462053733800436e-05, "loss": 0.0176, "step": 4757 }, { "epoch": 1.12, "learning_rate": 1.4459785514232509e-05, "loss": 0.0836, "step": 4758 }, { "epoch": 1.12, "learning_rate": 1.445751700821343e-05, "loss": 0.0399, "step": 4759 }, { "epoch": 1.12, "learning_rate": 1.4455248215888894e-05, "loss": 0.0096, "step": 4760 }, { "epoch": 1.12, "learning_rate": 1.4452979137404637e-05, "loss": 0.1492, "step": 4761 }, { "epoch": 1.12, "learning_rate": 1.445070977290639e-05, "loss": 0.0448, "step": 4762 }, { "epoch": 1.12, "learning_rate": 1.4448440122539924e-05, "loss": 0.0044, "step": 4763 }, { "epoch": 1.12, "learning_rate": 1.4446170186451012e-05, "loss": 0.0038, "step": 4764 }, { "epoch": 1.12, "learning_rate": 1.4443899964785453e-05, "loss": 0.0286, "step": 4765 }, { "epoch": 1.12, "learning_rate": 1.444162945768906e-05, "loss": 0.0572, "step": 4766 }, { "epoch": 1.12, "learning_rate": 1.4439358665307673e-05, "loss": 0.0096, "step": 4767 }, { "epoch": 1.12, "learning_rate": 1.443708758778714e-05, "loss": 0.0483, "step": 4768 }, { "epoch": 1.12, "learning_rate": 1.4434816225273335e-05, "loss": 0.0089, "step": 4769 }, { "epoch": 1.12, "learning_rate": 1.4432544577912147e-05, "loss": 0.0248, "step": 4770 }, { "epoch": 1.12, "learning_rate": 1.4430272645849481e-05, "loss": 0.1009, "step": 4771 }, { "epoch": 1.12, "learning_rate": 1.442800042923126e-05, "loss": 0.0388, "step": 4772 }, { "epoch": 1.12, "learning_rate": 1.4425727928203439e-05, "loss": 0.0382, "step": 4773 }, { "epoch": 1.12, "learning_rate": 1.4423455142911968e-05, "loss": 0.0122, "step": 4774 }, { "epoch": 1.12, "learning_rate": 1.4421182073502835e-05, "loss": 0.0722, "step": 4775 }, { "epoch": 1.12, "learning_rate": 1.4418908720122035e-05, "loss": 0.074, "step": 4776 }, { "epoch": 1.12, "learning_rate": 1.441663508291559e-05, "loss": 0.0963, "step": 4777 }, { "epoch": 1.12, "learning_rate": 1.4414361162029531e-05, "loss": 0.0292, "step": 4778 }, { "epoch": 1.12, "learning_rate": 1.4412086957609912e-05, "loss": 0.0259, "step": 4779 }, { "epoch": 1.12, "learning_rate": 1.4409812469802804e-05, "loss": 0.0578, "step": 4780 }, { "epoch": 1.12, "learning_rate": 1.4407537698754299e-05, "loss": 0.0155, "step": 4781 }, { "epoch": 1.12, "learning_rate": 1.4405262644610504e-05, "loss": 0.0108, "step": 4782 }, { "epoch": 1.12, "learning_rate": 1.4402987307517546e-05, "loss": 0.0075, "step": 4783 }, { "epoch": 1.12, "learning_rate": 1.4400711687621566e-05, "loss": 0.042, "step": 4784 }, { "epoch": 1.12, "learning_rate": 1.4398435785068735e-05, "loss": 0.011, "step": 4785 }, { "epoch": 1.12, "learning_rate": 1.4396159600005223e-05, "loss": 0.0395, "step": 4786 }, { "epoch": 1.12, "learning_rate": 1.4393883132577239e-05, "loss": 0.0165, "step": 4787 }, { "epoch": 1.12, "learning_rate": 1.4391606382930991e-05, "loss": 0.0128, "step": 4788 }, { "epoch": 1.12, "learning_rate": 1.4389329351212722e-05, "loss": 0.0542, "step": 4789 }, { "epoch": 1.12, "learning_rate": 1.4387052037568681e-05, "loss": 0.0151, "step": 4790 }, { "epoch": 1.12, "learning_rate": 1.438477444214514e-05, "loss": 0.0041, "step": 4791 }, { "epoch": 1.12, "learning_rate": 1.4382496565088389e-05, "loss": 0.0743, "step": 4792 }, { "epoch": 1.12, "learning_rate": 1.4380218406544733e-05, "loss": 0.006, "step": 4793 }, { "epoch": 1.13, "learning_rate": 1.43779399666605e-05, "loss": 0.0162, "step": 4794 }, { "epoch": 1.13, "learning_rate": 1.4375661245582038e-05, "loss": 0.0097, "step": 4795 }, { "epoch": 1.13, "learning_rate": 1.43733822434557e-05, "loss": 0.0401, "step": 4796 }, { "epoch": 1.13, "learning_rate": 1.4371102960427875e-05, "loss": 0.0402, "step": 4797 }, { "epoch": 1.13, "learning_rate": 1.4368823396644953e-05, "loss": 0.1077, "step": 4798 }, { "epoch": 1.13, "learning_rate": 1.4366543552253354e-05, "loss": 0.0353, "step": 4799 }, { "epoch": 1.13, "learning_rate": 1.4364263427399513e-05, "loss": 0.0172, "step": 4800 }, { "epoch": 1.13, "learning_rate": 1.4361983022229877e-05, "loss": 0.073, "step": 4801 }, { "epoch": 1.13, "learning_rate": 1.435970233689092e-05, "loss": 0.0278, "step": 4802 }, { "epoch": 1.13, "learning_rate": 1.435742137152913e-05, "loss": 0.004, "step": 4803 }, { "epoch": 1.13, "learning_rate": 1.4355140126291012e-05, "loss": 0.1573, "step": 4804 }, { "epoch": 1.13, "learning_rate": 1.4352858601323092e-05, "loss": 0.0366, "step": 4805 }, { "epoch": 1.13, "learning_rate": 1.4350576796771907e-05, "loss": 0.0721, "step": 4806 }, { "epoch": 1.13, "learning_rate": 1.4348294712784024e-05, "loss": 0.0391, "step": 4807 }, { "epoch": 1.13, "learning_rate": 1.4346012349506012e-05, "loss": 0.0472, "step": 4808 }, { "epoch": 1.13, "learning_rate": 1.4343729707084475e-05, "loss": 0.0039, "step": 4809 }, { "epoch": 1.13, "learning_rate": 1.434144678566602e-05, "loss": 0.0122, "step": 4810 }, { "epoch": 1.13, "learning_rate": 1.4339163585397282e-05, "loss": 0.0477, "step": 4811 }, { "epoch": 1.13, "learning_rate": 1.4336880106424911e-05, "loss": 0.0273, "step": 4812 }, { "epoch": 1.13, "learning_rate": 1.4334596348895575e-05, "loss": 0.0463, "step": 4813 }, { "epoch": 1.13, "learning_rate": 1.4332312312955956e-05, "loss": 0.1296, "step": 4814 }, { "epoch": 1.13, "learning_rate": 1.4330027998752761e-05, "loss": 0.0099, "step": 4815 }, { "epoch": 1.13, "learning_rate": 1.4327743406432708e-05, "loss": 0.0547, "step": 4816 }, { "epoch": 1.13, "learning_rate": 1.4325458536142539e-05, "loss": 0.0247, "step": 4817 }, { "epoch": 1.13, "learning_rate": 1.4323173388029007e-05, "loss": 0.0313, "step": 4818 }, { "epoch": 1.13, "learning_rate": 1.4320887962238886e-05, "loss": 0.0861, "step": 4819 }, { "epoch": 1.13, "learning_rate": 1.4318602258918972e-05, "loss": 0.0308, "step": 4820 }, { "epoch": 1.13, "learning_rate": 1.4316316278216079e-05, "loss": 0.0505, "step": 4821 }, { "epoch": 1.13, "learning_rate": 1.4314030020277026e-05, "loss": 0.079, "step": 4822 }, { "epoch": 1.13, "learning_rate": 1.4311743485248663e-05, "loss": 0.0311, "step": 4823 }, { "epoch": 1.13, "learning_rate": 1.4309456673277855e-05, "loss": 0.0297, "step": 4824 }, { "epoch": 1.13, "learning_rate": 1.4307169584511485e-05, "loss": 0.0116, "step": 4825 }, { "epoch": 1.13, "learning_rate": 1.4304882219096446e-05, "loss": 0.032, "step": 4826 }, { "epoch": 1.13, "learning_rate": 1.4302594577179658e-05, "loss": 0.0494, "step": 4827 }, { "epoch": 1.13, "learning_rate": 1.430030665890806e-05, "loss": 0.0602, "step": 4828 }, { "epoch": 1.13, "learning_rate": 1.42980184644286e-05, "loss": 0.0082, "step": 4829 }, { "epoch": 1.13, "learning_rate": 1.4295729993888247e-05, "loss": 0.0646, "step": 4830 }, { "epoch": 1.13, "learning_rate": 1.4293441247433994e-05, "loss": 0.0508, "step": 4831 }, { "epoch": 1.13, "learning_rate": 1.4291152225212844e-05, "loss": 0.0585, "step": 4832 }, { "epoch": 1.13, "learning_rate": 1.428886292737182e-05, "loss": 0.0101, "step": 4833 }, { "epoch": 1.13, "learning_rate": 1.4286573354057964e-05, "loss": 0.0102, "step": 4834 }, { "epoch": 1.13, "learning_rate": 1.4284283505418336e-05, "loss": 0.0248, "step": 4835 }, { "epoch": 1.13, "learning_rate": 1.4281993381600007e-05, "loss": 0.007, "step": 4836 }, { "epoch": 1.14, "learning_rate": 1.4279702982750077e-05, "loss": 0.0221, "step": 4837 }, { "epoch": 1.14, "learning_rate": 1.4277412309015657e-05, "loss": 0.164, "step": 4838 }, { "epoch": 1.14, "learning_rate": 1.4275121360543876e-05, "loss": 0.1085, "step": 4839 }, { "epoch": 1.14, "learning_rate": 1.4272830137481879e-05, "loss": 0.0258, "step": 4840 }, { "epoch": 1.14, "learning_rate": 1.4270538639976835e-05, "loss": 0.006, "step": 4841 }, { "epoch": 1.14, "learning_rate": 1.426824686817592e-05, "loss": 0.0305, "step": 4842 }, { "epoch": 1.14, "learning_rate": 1.4265954822226345e-05, "loss": 0.0654, "step": 4843 }, { "epoch": 1.14, "learning_rate": 1.4263662502275315e-05, "loss": 0.0601, "step": 4844 }, { "epoch": 1.14, "learning_rate": 1.4261369908470074e-05, "loss": 0.0317, "step": 4845 }, { "epoch": 1.14, "learning_rate": 1.4259077040957872e-05, "loss": 0.0892, "step": 4846 }, { "epoch": 1.14, "learning_rate": 1.4256783899885976e-05, "loss": 0.0692, "step": 4847 }, { "epoch": 1.14, "learning_rate": 1.425449048540168e-05, "loss": 0.1007, "step": 4848 }, { "epoch": 1.14, "learning_rate": 1.4252196797652286e-05, "loss": 0.088, "step": 4849 }, { "epoch": 1.14, "learning_rate": 1.4249902836785118e-05, "loss": 0.035, "step": 4850 }, { "epoch": 1.14, "learning_rate": 1.4247608602947521e-05, "loss": 0.0395, "step": 4851 }, { "epoch": 1.14, "learning_rate": 1.4245314096286844e-05, "loss": 0.0606, "step": 4852 }, { "epoch": 1.14, "learning_rate": 1.4243019316950468e-05, "loss": 0.0326, "step": 4853 }, { "epoch": 1.14, "learning_rate": 1.4240724265085788e-05, "loss": 0.0181, "step": 4854 }, { "epoch": 1.14, "learning_rate": 1.4238428940840212e-05, "loss": 0.0152, "step": 4855 }, { "epoch": 1.14, "learning_rate": 1.423613334436117e-05, "loss": 0.0096, "step": 4856 }, { "epoch": 1.14, "learning_rate": 1.4233837475796105e-05, "loss": 0.0137, "step": 4857 }, { "epoch": 1.14, "learning_rate": 1.4231541335292481e-05, "loss": 0.0335, "step": 4858 }, { "epoch": 1.14, "learning_rate": 1.422924492299778e-05, "loss": 0.076, "step": 4859 }, { "epoch": 1.14, "learning_rate": 1.42269482390595e-05, "loss": 0.0163, "step": 4860 }, { "epoch": 1.14, "learning_rate": 1.4224651283625154e-05, "loss": 0.0311, "step": 4861 }, { "epoch": 1.14, "learning_rate": 1.4222354056842283e-05, "loss": 0.049, "step": 4862 }, { "epoch": 1.14, "learning_rate": 1.4220056558858426e-05, "loss": 0.0457, "step": 4863 }, { "epoch": 1.14, "learning_rate": 1.4217758789821159e-05, "loss": 0.0767, "step": 4864 }, { "epoch": 1.14, "learning_rate": 1.421546074987806e-05, "loss": 0.0815, "step": 4865 }, { "epoch": 1.14, "learning_rate": 1.4213162439176741e-05, "loss": 0.0568, "step": 4866 }, { "epoch": 1.14, "learning_rate": 1.4210863857864818e-05, "loss": 0.0219, "step": 4867 }, { "epoch": 1.14, "learning_rate": 1.4208565006089925e-05, "loss": 0.0269, "step": 4868 }, { "epoch": 1.14, "learning_rate": 1.420626588399972e-05, "loss": 0.03, "step": 4869 }, { "epoch": 1.14, "learning_rate": 1.4203966491741876e-05, "loss": 0.0382, "step": 4870 }, { "epoch": 1.14, "learning_rate": 1.420166682946408e-05, "loss": 0.1062, "step": 4871 }, { "epoch": 1.14, "learning_rate": 1.4199366897314041e-05, "loss": 0.0474, "step": 4872 }, { "epoch": 1.14, "learning_rate": 1.4197066695439481e-05, "loss": 0.0534, "step": 4873 }, { "epoch": 1.14, "learning_rate": 1.4194766223988146e-05, "loss": 0.0068, "step": 4874 }, { "epoch": 1.14, "learning_rate": 1.4192465483107789e-05, "loss": 0.0284, "step": 4875 }, { "epoch": 1.14, "learning_rate": 1.4190164472946189e-05, "loss": 0.114, "step": 4876 }, { "epoch": 1.14, "learning_rate": 1.418786319365114e-05, "loss": 0.0375, "step": 4877 }, { "epoch": 1.14, "learning_rate": 1.4185561645370452e-05, "loss": 0.045, "step": 4878 }, { "epoch": 1.15, "learning_rate": 1.418325982825195e-05, "loss": 0.0138, "step": 4879 }, { "epoch": 1.15, "learning_rate": 1.4180957742443487e-05, "loss": 0.0806, "step": 4880 }, { "epoch": 1.15, "learning_rate": 1.4178655388092916e-05, "loss": 0.0435, "step": 4881 }, { "epoch": 1.15, "learning_rate": 1.4176352765348126e-05, "loss": 0.0244, "step": 4882 }, { "epoch": 1.15, "learning_rate": 1.4174049874357008e-05, "loss": 0.0664, "step": 4883 }, { "epoch": 1.15, "learning_rate": 1.4171746715267476e-05, "loss": 0.1, "step": 4884 }, { "epoch": 1.15, "learning_rate": 1.4169443288227466e-05, "loss": 0.039, "step": 4885 }, { "epoch": 1.15, "learning_rate": 1.4167139593384922e-05, "loss": 0.0206, "step": 4886 }, { "epoch": 1.15, "learning_rate": 1.4164835630887813e-05, "loss": 0.0842, "step": 4887 }, { "epoch": 1.15, "learning_rate": 1.416253140088412e-05, "loss": 0.0297, "step": 4888 }, { "epoch": 1.15, "learning_rate": 1.4160226903521844e-05, "loss": 0.0382, "step": 4889 }, { "epoch": 1.15, "learning_rate": 1.4157922138949003e-05, "loss": 0.0198, "step": 4890 }, { "epoch": 1.15, "learning_rate": 1.415561710731363e-05, "loss": 0.0222, "step": 4891 }, { "epoch": 1.15, "learning_rate": 1.415331180876378e-05, "loss": 0.0048, "step": 4892 }, { "epoch": 1.15, "learning_rate": 1.4151006243447518e-05, "loss": 0.1063, "step": 4893 }, { "epoch": 1.15, "learning_rate": 1.4148700411512931e-05, "loss": 0.0354, "step": 4894 }, { "epoch": 1.15, "learning_rate": 1.4146394313108125e-05, "loss": 0.0525, "step": 4895 }, { "epoch": 1.15, "learning_rate": 1.4144087948381215e-05, "loss": 0.0075, "step": 4896 }, { "epoch": 1.15, "learning_rate": 1.4141781317480345e-05, "loss": 0.0294, "step": 4897 }, { "epoch": 1.15, "learning_rate": 1.4139474420553665e-05, "loss": 0.0805, "step": 4898 }, { "epoch": 1.15, "learning_rate": 1.4137167257749346e-05, "loss": 0.026, "step": 4899 }, { "epoch": 1.15, "learning_rate": 1.413485982921558e-05, "loss": 0.0668, "step": 4900 }, { "epoch": 1.15, "learning_rate": 1.413255213510057e-05, "loss": 0.037, "step": 4901 }, { "epoch": 1.15, "learning_rate": 1.4130244175552536e-05, "loss": 0.0155, "step": 4902 }, { "epoch": 1.15, "learning_rate": 1.4127935950719729e-05, "loss": 0.0324, "step": 4903 }, { "epoch": 1.15, "learning_rate": 1.412562746075039e-05, "loss": 0.0216, "step": 4904 }, { "epoch": 1.15, "learning_rate": 1.4123318705792805e-05, "loss": 0.0772, "step": 4905 }, { "epoch": 1.15, "learning_rate": 1.4121009685995262e-05, "loss": 0.0758, "step": 4906 }, { "epoch": 1.15, "learning_rate": 1.4118700401506064e-05, "loss": 0.0561, "step": 4907 }, { "epoch": 1.15, "learning_rate": 1.4116390852473545e-05, "loss": 0.0615, "step": 4908 }, { "epoch": 1.15, "learning_rate": 1.4114081039046038e-05, "loss": 0.0166, "step": 4909 }, { "epoch": 1.15, "learning_rate": 1.4111770961371906e-05, "loss": 0.0104, "step": 4910 }, { "epoch": 1.15, "learning_rate": 1.4109460619599524e-05, "loss": 0.0342, "step": 4911 }, { "epoch": 1.15, "learning_rate": 1.4107150013877284e-05, "loss": 0.0548, "step": 4912 }, { "epoch": 1.15, "learning_rate": 1.4104839144353597e-05, "loss": 0.0163, "step": 4913 }, { "epoch": 1.15, "learning_rate": 1.4102528011176892e-05, "loss": 0.005, "step": 4914 }, { "epoch": 1.15, "learning_rate": 1.410021661449561e-05, "loss": 0.0518, "step": 4915 }, { "epoch": 1.15, "learning_rate": 1.4097904954458211e-05, "loss": 0.0115, "step": 4916 }, { "epoch": 1.15, "learning_rate": 1.4095593031213173e-05, "loss": 0.0473, "step": 4917 }, { "epoch": 1.15, "learning_rate": 1.409328084490899e-05, "loss": 0.0425, "step": 4918 }, { "epoch": 1.15, "learning_rate": 1.4090968395694174e-05, "loss": 0.0154, "step": 4919 }, { "epoch": 1.15, "learning_rate": 1.4088655683717255e-05, "loss": 0.0836, "step": 4920 }, { "epoch": 1.15, "learning_rate": 1.408634270912677e-05, "loss": 0.1807, "step": 4921 }, { "epoch": 1.16, "learning_rate": 1.4084029472071293e-05, "loss": 0.0772, "step": 4922 }, { "epoch": 1.16, "learning_rate": 1.4081715972699393e-05, "loss": 0.0065, "step": 4923 }, { "epoch": 1.16, "learning_rate": 1.4079402211159674e-05, "loss": 0.1208, "step": 4924 }, { "epoch": 1.16, "learning_rate": 1.407708818760074e-05, "loss": 0.0188, "step": 4925 }, { "epoch": 1.16, "learning_rate": 1.4074773902171227e-05, "loss": 0.0209, "step": 4926 }, { "epoch": 1.16, "learning_rate": 1.4072459355019774e-05, "loss": 0.05, "step": 4927 }, { "epoch": 1.16, "learning_rate": 1.4070144546295051e-05, "loss": 0.0558, "step": 4928 }, { "epoch": 1.16, "learning_rate": 1.4067829476145733e-05, "loss": 0.061, "step": 4929 }, { "epoch": 1.16, "learning_rate": 1.406551414472052e-05, "loss": 0.0104, "step": 4930 }, { "epoch": 1.16, "learning_rate": 1.4063198552168123e-05, "loss": 0.0581, "step": 4931 }, { "epoch": 1.16, "learning_rate": 1.4060882698637273e-05, "loss": 0.0545, "step": 4932 }, { "epoch": 1.16, "learning_rate": 1.4058566584276717e-05, "loss": 0.144, "step": 4933 }, { "epoch": 1.16, "learning_rate": 1.4056250209235217e-05, "loss": 0.0169, "step": 4934 }, { "epoch": 1.16, "learning_rate": 1.4053933573661554e-05, "loss": 0.0331, "step": 4935 }, { "epoch": 1.16, "learning_rate": 1.4051616677704527e-05, "loss": 0.0344, "step": 4936 }, { "epoch": 1.16, "learning_rate": 1.4049299521512948e-05, "loss": 0.0555, "step": 4937 }, { "epoch": 1.16, "learning_rate": 1.4046982105235646e-05, "loss": 0.0281, "step": 4938 }, { "epoch": 1.16, "learning_rate": 1.4044664429021469e-05, "loss": 0.0376, "step": 4939 }, { "epoch": 1.16, "learning_rate": 1.4042346493019285e-05, "loss": 0.055, "step": 4940 }, { "epoch": 1.16, "learning_rate": 1.404002829737797e-05, "loss": 0.0283, "step": 4941 }, { "epoch": 1.16, "learning_rate": 1.4037709842246422e-05, "loss": 0.0616, "step": 4942 }, { "epoch": 1.16, "learning_rate": 1.4035391127773556e-05, "loss": 0.0436, "step": 4943 }, { "epoch": 1.16, "learning_rate": 1.4033072154108305e-05, "loss": 0.0379, "step": 4944 }, { "epoch": 1.16, "learning_rate": 1.403075292139961e-05, "loss": 0.0497, "step": 4945 }, { "epoch": 1.16, "learning_rate": 1.4028433429796438e-05, "loss": 0.0484, "step": 4946 }, { "epoch": 1.16, "learning_rate": 1.4026113679447768e-05, "loss": 0.0472, "step": 4947 }, { "epoch": 1.16, "learning_rate": 1.4023793670502603e-05, "loss": 0.0277, "step": 4948 }, { "epoch": 1.16, "learning_rate": 1.4021473403109951e-05, "loss": 0.0377, "step": 4949 }, { "epoch": 1.16, "learning_rate": 1.4019152877418845e-05, "loss": 0.0204, "step": 4950 }, { "epoch": 1.16, "learning_rate": 1.4016832093578328e-05, "loss": 0.0216, "step": 4951 }, { "epoch": 1.16, "learning_rate": 1.401451105173747e-05, "loss": 0.0897, "step": 4952 }, { "epoch": 1.16, "learning_rate": 1.4012189752045344e-05, "loss": 0.0243, "step": 4953 }, { "epoch": 1.16, "learning_rate": 1.4009868194651053e-05, "loss": 0.0117, "step": 4954 }, { "epoch": 1.16, "learning_rate": 1.4007546379703705e-05, "loss": 0.0084, "step": 4955 }, { "epoch": 1.16, "learning_rate": 1.4005224307352432e-05, "loss": 0.0072, "step": 4956 }, { "epoch": 1.16, "learning_rate": 1.400290197774638e-05, "loss": 0.0617, "step": 4957 }, { "epoch": 1.16, "learning_rate": 1.4000579391034712e-05, "loss": 0.0188, "step": 4958 }, { "epoch": 1.16, "learning_rate": 1.3998256547366606e-05, "loss": 0.0107, "step": 4959 }, { "epoch": 1.16, "learning_rate": 1.3995933446891264e-05, "loss": 0.0216, "step": 4960 }, { "epoch": 1.16, "learning_rate": 1.3993610089757888e-05, "loss": 0.0322, "step": 4961 }, { "epoch": 1.16, "learning_rate": 1.3991286476115715e-05, "loss": 0.0813, "step": 4962 }, { "epoch": 1.16, "learning_rate": 1.3988962606113986e-05, "loss": 0.0813, "step": 4963 }, { "epoch": 1.17, "learning_rate": 1.3986638479901964e-05, "loss": 0.0459, "step": 4964 }, { "epoch": 1.17, "learning_rate": 1.3984314097628925e-05, "loss": 0.0075, "step": 4965 }, { "epoch": 1.17, "learning_rate": 1.398198945944417e-05, "loss": 0.0367, "step": 4966 }, { "epoch": 1.17, "learning_rate": 1.3979664565497004e-05, "loss": 0.0021, "step": 4967 }, { "epoch": 1.17, "learning_rate": 1.397733941593676e-05, "loss": 0.0111, "step": 4968 }, { "epoch": 1.17, "learning_rate": 1.3975014010912774e-05, "loss": 0.0111, "step": 4969 }, { "epoch": 1.17, "learning_rate": 1.397268835057441e-05, "loss": 0.0628, "step": 4970 }, { "epoch": 1.17, "learning_rate": 1.3970362435071047e-05, "loss": 0.1034, "step": 4971 }, { "epoch": 1.17, "learning_rate": 1.3968036264552078e-05, "loss": 0.1207, "step": 4972 }, { "epoch": 1.17, "learning_rate": 1.3965709839166905e-05, "loss": 0.0209, "step": 4973 }, { "epoch": 1.17, "learning_rate": 1.3963383159064967e-05, "loss": 0.0449, "step": 4974 }, { "epoch": 1.17, "learning_rate": 1.3961056224395694e-05, "loss": 0.079, "step": 4975 }, { "epoch": 1.17, "learning_rate": 1.3958729035308553e-05, "loss": 0.0188, "step": 4976 }, { "epoch": 1.17, "learning_rate": 1.3956401591953016e-05, "loss": 0.01, "step": 4977 }, { "epoch": 1.17, "learning_rate": 1.395407389447857e-05, "loss": 0.0106, "step": 4978 }, { "epoch": 1.17, "learning_rate": 1.3951745943034727e-05, "loss": 0.0885, "step": 4979 }, { "epoch": 1.17, "learning_rate": 1.3949417737771014e-05, "loss": 0.069, "step": 4980 }, { "epoch": 1.17, "learning_rate": 1.394708927883696e-05, "loss": 0.1181, "step": 4981 }, { "epoch": 1.17, "learning_rate": 1.3944760566382134e-05, "loss": 0.0078, "step": 4982 }, { "epoch": 1.17, "learning_rate": 1.39424316005561e-05, "loss": 0.0915, "step": 4983 }, { "epoch": 1.17, "learning_rate": 1.3940102381508455e-05, "loss": 0.0122, "step": 4984 }, { "epoch": 1.17, "learning_rate": 1.3937772909388798e-05, "loss": 0.0101, "step": 4985 }, { "epoch": 1.17, "learning_rate": 1.3935443184346753e-05, "loss": 0.0271, "step": 4986 }, { "epoch": 1.17, "learning_rate": 1.3933113206531956e-05, "loss": 0.0636, "step": 4987 }, { "epoch": 1.17, "learning_rate": 1.3930782976094064e-05, "loss": 0.0095, "step": 4988 }, { "epoch": 1.17, "learning_rate": 1.3928452493182742e-05, "loss": 0.1126, "step": 4989 }, { "epoch": 1.17, "learning_rate": 1.3926121757947681e-05, "loss": 0.051, "step": 4990 }, { "epoch": 1.17, "learning_rate": 1.392379077053858e-05, "loss": 0.0573, "step": 4991 }, { "epoch": 1.17, "learning_rate": 1.3921459531105165e-05, "loss": 0.1353, "step": 4992 }, { "epoch": 1.17, "learning_rate": 1.3919128039797166e-05, "loss": 0.0037, "step": 4993 }, { "epoch": 1.17, "learning_rate": 1.3916796296764335e-05, "loss": 0.0133, "step": 4994 }, { "epoch": 1.17, "learning_rate": 1.3914464302156436e-05, "loss": 0.0564, "step": 4995 }, { "epoch": 1.17, "learning_rate": 1.3912132056123257e-05, "loss": 0.0215, "step": 4996 }, { "epoch": 1.17, "learning_rate": 1.3909799558814595e-05, "loss": 0.0035, "step": 4997 }, { "epoch": 1.17, "learning_rate": 1.3907466810380272e-05, "loss": 0.063, "step": 4998 }, { "epoch": 1.17, "learning_rate": 1.3905133810970108e-05, "loss": 0.0275, "step": 4999 }, { "epoch": 1.17, "learning_rate": 1.3902800560733965e-05, "loss": 0.0522, "step": 5000 }, { "epoch": 1.17, "learning_rate": 1.3900467059821697e-05, "loss": 0.0245, "step": 5001 }, { "epoch": 1.17, "learning_rate": 1.3898133308383188e-05, "loss": 0.005, "step": 5002 }, { "epoch": 1.17, "learning_rate": 1.3895799306568334e-05, "loss": 0.0154, "step": 5003 }, { "epoch": 1.17, "learning_rate": 1.3893465054527049e-05, "loss": 0.0649, "step": 5004 }, { "epoch": 1.17, "learning_rate": 1.389113055240926e-05, "loss": 0.0891, "step": 5005 }, { "epoch": 1.17, "learning_rate": 1.3888795800364911e-05, "loss": 0.0644, "step": 5006 }, { "epoch": 1.18, "learning_rate": 1.3886460798543962e-05, "loss": 0.0443, "step": 5007 }, { "epoch": 1.18, "learning_rate": 1.3884125547096395e-05, "loss": 0.014, "step": 5008 }, { "epoch": 1.18, "learning_rate": 1.3881790046172197e-05, "loss": 0.0604, "step": 5009 }, { "epoch": 1.18, "learning_rate": 1.387945429592138e-05, "loss": 0.0251, "step": 5010 }, { "epoch": 1.18, "learning_rate": 1.3877118296493963e-05, "loss": 0.0488, "step": 5011 }, { "epoch": 1.18, "learning_rate": 1.3874782048039996e-05, "loss": 0.0138, "step": 5012 }, { "epoch": 1.18, "learning_rate": 1.387244555070953e-05, "loss": 0.0162, "step": 5013 }, { "epoch": 1.18, "learning_rate": 1.387010880465264e-05, "loss": 0.0458, "step": 5014 }, { "epoch": 1.18, "learning_rate": 1.386777181001941e-05, "loss": 0.0245, "step": 5015 }, { "epoch": 1.18, "learning_rate": 1.386543456695995e-05, "loss": 0.1636, "step": 5016 }, { "epoch": 1.18, "learning_rate": 1.3863097075624378e-05, "loss": 0.0513, "step": 5017 }, { "epoch": 1.18, "learning_rate": 1.3860759336162837e-05, "loss": 0.0175, "step": 5018 }, { "epoch": 1.18, "learning_rate": 1.3858421348725469e-05, "loss": 0.0497, "step": 5019 }, { "epoch": 1.18, "learning_rate": 1.3856083113462451e-05, "loss": 0.041, "step": 5020 }, { "epoch": 1.18, "learning_rate": 1.385374463052396e-05, "loss": 0.0724, "step": 5021 }, { "epoch": 1.18, "learning_rate": 1.3851405900060204e-05, "loss": 0.0328, "step": 5022 }, { "epoch": 1.18, "learning_rate": 1.3849066922221392e-05, "loss": 0.0168, "step": 5023 }, { "epoch": 1.18, "learning_rate": 1.3846727697157763e-05, "loss": 0.0106, "step": 5024 }, { "epoch": 1.18, "learning_rate": 1.3844388225019559e-05, "loss": 0.0441, "step": 5025 }, { "epoch": 1.18, "learning_rate": 1.3842048505957049e-05, "loss": 0.0123, "step": 5026 }, { "epoch": 1.18, "learning_rate": 1.3839708540120507e-05, "loss": 0.0345, "step": 5027 }, { "epoch": 1.18, "learning_rate": 1.3837368327660235e-05, "loss": 0.0255, "step": 5028 }, { "epoch": 1.18, "learning_rate": 1.3835027868726538e-05, "loss": 0.0202, "step": 5029 }, { "epoch": 1.18, "learning_rate": 1.3832687163469749e-05, "loss": 0.0351, "step": 5030 }, { "epoch": 1.18, "learning_rate": 1.3830346212040207e-05, "loss": 0.0215, "step": 5031 }, { "epoch": 1.18, "learning_rate": 1.3828005014588275e-05, "loss": 0.0326, "step": 5032 }, { "epoch": 1.18, "learning_rate": 1.3825663571264318e-05, "loss": 0.0307, "step": 5033 }, { "epoch": 1.18, "learning_rate": 1.382332188221874e-05, "loss": 0.0505, "step": 5034 }, { "epoch": 1.18, "learning_rate": 1.3820979947601936e-05, "loss": 0.0088, "step": 5035 }, { "epoch": 1.18, "learning_rate": 1.3818637767564336e-05, "loss": 0.0069, "step": 5036 }, { "epoch": 1.18, "learning_rate": 1.3816295342256374e-05, "loss": 0.0113, "step": 5037 }, { "epoch": 1.18, "learning_rate": 1.3813952671828503e-05, "loss": 0.0552, "step": 5038 }, { "epoch": 1.18, "learning_rate": 1.3811609756431193e-05, "loss": 0.003, "step": 5039 }, { "epoch": 1.18, "learning_rate": 1.380926659621493e-05, "loss": 0.0189, "step": 5040 }, { "epoch": 1.18, "learning_rate": 1.3806923191330212e-05, "loss": 0.0059, "step": 5041 }, { "epoch": 1.18, "learning_rate": 1.3804579541927559e-05, "loss": 0.025, "step": 5042 }, { "epoch": 1.18, "learning_rate": 1.3802235648157498e-05, "loss": 0.0266, "step": 5043 }, { "epoch": 1.18, "learning_rate": 1.3799891510170584e-05, "loss": 0.0527, "step": 5044 }, { "epoch": 1.18, "learning_rate": 1.3797547128117374e-05, "loss": 0.0086, "step": 5045 }, { "epoch": 1.18, "learning_rate": 1.3795202502148455e-05, "loss": 0.0099, "step": 5046 }, { "epoch": 1.18, "learning_rate": 1.3792857632414411e-05, "loss": 0.1104, "step": 5047 }, { "epoch": 1.18, "learning_rate": 1.3790512519065863e-05, "loss": 0.0184, "step": 5048 }, { "epoch": 1.18, "learning_rate": 1.378816716225343e-05, "loss": 0.1171, "step": 5049 }, { "epoch": 1.19, "learning_rate": 1.3785821562127757e-05, "loss": 0.0838, "step": 5050 }, { "epoch": 1.19, "learning_rate": 1.3783475718839499e-05, "loss": 0.0045, "step": 5051 }, { "epoch": 1.19, "learning_rate": 1.3781129632539335e-05, "loss": 0.0774, "step": 5052 }, { "epoch": 1.19, "learning_rate": 1.3778783303377945e-05, "loss": 0.0059, "step": 5053 }, { "epoch": 1.19, "learning_rate": 1.3776436731506041e-05, "loss": 0.0339, "step": 5054 }, { "epoch": 1.19, "learning_rate": 1.377408991707434e-05, "loss": 0.0127, "step": 5055 }, { "epoch": 1.19, "learning_rate": 1.3771742860233577e-05, "loss": 0.0934, "step": 5056 }, { "epoch": 1.19, "learning_rate": 1.37693955611345e-05, "loss": 0.0471, "step": 5057 }, { "epoch": 1.19, "learning_rate": 1.3767048019927882e-05, "loss": 0.0392, "step": 5058 }, { "epoch": 1.19, "learning_rate": 1.3764700236764498e-05, "loss": 0.0384, "step": 5059 }, { "epoch": 1.19, "learning_rate": 1.3762352211795153e-05, "loss": 0.0102, "step": 5060 }, { "epoch": 1.19, "learning_rate": 1.3760003945170654e-05, "loss": 0.0153, "step": 5061 }, { "epoch": 1.19, "learning_rate": 1.3757655437041837e-05, "loss": 0.0355, "step": 5062 }, { "epoch": 1.19, "learning_rate": 1.3755306687559537e-05, "loss": 0.0211, "step": 5063 }, { "epoch": 1.19, "learning_rate": 1.375295769687462e-05, "loss": 0.0196, "step": 5064 }, { "epoch": 1.19, "learning_rate": 1.375060846513796e-05, "loss": 0.0657, "step": 5065 }, { "epoch": 1.19, "learning_rate": 1.3748258992500448e-05, "loss": 0.0479, "step": 5066 }, { "epoch": 1.19, "learning_rate": 1.3745909279112983e-05, "loss": 0.0689, "step": 5067 }, { "epoch": 1.19, "learning_rate": 1.3743559325126498e-05, "loss": 0.0305, "step": 5068 }, { "epoch": 1.19, "learning_rate": 1.3741209130691927e-05, "loss": 0.0053, "step": 5069 }, { "epoch": 1.19, "learning_rate": 1.373885869596022e-05, "loss": 0.0585, "step": 5070 }, { "epoch": 1.19, "learning_rate": 1.3736508021082342e-05, "loss": 0.0364, "step": 5071 }, { "epoch": 1.19, "learning_rate": 1.3734157106209282e-05, "loss": 0.0122, "step": 5072 }, { "epoch": 1.19, "learning_rate": 1.3731805951492039e-05, "loss": 0.0205, "step": 5073 }, { "epoch": 1.19, "learning_rate": 1.3729454557081623e-05, "loss": 0.0277, "step": 5074 }, { "epoch": 1.19, "learning_rate": 1.3727102923129066e-05, "loss": 0.0513, "step": 5075 }, { "epoch": 1.19, "learning_rate": 1.372475104978541e-05, "loss": 0.035, "step": 5076 }, { "epoch": 1.19, "learning_rate": 1.3722398937201717e-05, "loss": 0.0209, "step": 5077 }, { "epoch": 1.19, "learning_rate": 1.372004658552907e-05, "loss": 0.0416, "step": 5078 }, { "epoch": 1.19, "learning_rate": 1.3717693994918549e-05, "loss": 0.0034, "step": 5079 }, { "epoch": 1.19, "learning_rate": 1.3715341165521267e-05, "loss": 0.0413, "step": 5080 }, { "epoch": 1.19, "learning_rate": 1.3712988097488345e-05, "loss": 0.0074, "step": 5081 }, { "epoch": 1.19, "learning_rate": 1.3710634790970918e-05, "loss": 0.0024, "step": 5082 }, { "epoch": 1.19, "learning_rate": 1.370828124612014e-05, "loss": 0.0214, "step": 5083 }, { "epoch": 1.19, "learning_rate": 1.3705927463087179e-05, "loss": 0.0541, "step": 5084 }, { "epoch": 1.19, "learning_rate": 1.3703573442023212e-05, "loss": 0.0087, "step": 5085 }, { "epoch": 1.19, "learning_rate": 1.3701219183079448e-05, "loss": 0.0808, "step": 5086 }, { "epoch": 1.19, "learning_rate": 1.3698864686407091e-05, "loss": 0.0122, "step": 5087 }, { "epoch": 1.19, "learning_rate": 1.369650995215738e-05, "loss": 0.0441, "step": 5088 }, { "epoch": 1.19, "learning_rate": 1.3694154980481549e-05, "loss": 0.0514, "step": 5089 }, { "epoch": 1.19, "learning_rate": 1.3691799771530865e-05, "loss": 0.0062, "step": 5090 }, { "epoch": 1.19, "learning_rate": 1.3689444325456593e-05, "loss": 0.0586, "step": 5091 }, { "epoch": 1.2, "learning_rate": 1.3687088642410035e-05, "loss": 0.0598, "step": 5092 }, { "epoch": 1.2, "learning_rate": 1.3684732722542484e-05, "loss": 0.0152, "step": 5093 }, { "epoch": 1.2, "learning_rate": 1.3682376566005273e-05, "loss": 0.0244, "step": 5094 }, { "epoch": 1.2, "learning_rate": 1.3680020172949726e-05, "loss": 0.0444, "step": 5095 }, { "epoch": 1.2, "learning_rate": 1.3677663543527202e-05, "loss": 0.0265, "step": 5096 }, { "epoch": 1.2, "learning_rate": 1.3675306677889062e-05, "loss": 0.029, "step": 5097 }, { "epoch": 1.2, "learning_rate": 1.367294957618669e-05, "loss": 0.026, "step": 5098 }, { "epoch": 1.2, "learning_rate": 1.3670592238571475e-05, "loss": 0.016, "step": 5099 }, { "epoch": 1.2, "learning_rate": 1.3668234665194842e-05, "loss": 0.0325, "step": 5100 }, { "epoch": 1.2, "learning_rate": 1.3665876856208203e-05, "loss": 0.0074, "step": 5101 }, { "epoch": 1.2, "learning_rate": 1.3663518811763005e-05, "loss": 0.0993, "step": 5102 }, { "epoch": 1.2, "learning_rate": 1.366116053201071e-05, "loss": 0.0506, "step": 5103 }, { "epoch": 1.2, "learning_rate": 1.3658802017102785e-05, "loss": 0.0054, "step": 5104 }, { "epoch": 1.2, "learning_rate": 1.3656443267190715e-05, "loss": 0.0471, "step": 5105 }, { "epoch": 1.2, "learning_rate": 1.3654084282426007e-05, "loss": 0.0058, "step": 5106 }, { "epoch": 1.2, "learning_rate": 1.3651725062960174e-05, "loss": 0.0842, "step": 5107 }, { "epoch": 1.2, "learning_rate": 1.3649365608944753e-05, "loss": 0.0052, "step": 5108 }, { "epoch": 1.2, "learning_rate": 1.3647005920531284e-05, "loss": 0.0228, "step": 5109 }, { "epoch": 1.2, "learning_rate": 1.3644645997871336e-05, "loss": 0.0223, "step": 5110 }, { "epoch": 1.2, "learning_rate": 1.3642285841116482e-05, "loss": 0.0749, "step": 5111 }, { "epoch": 1.2, "learning_rate": 1.363992545041832e-05, "loss": 0.0287, "step": 5112 }, { "epoch": 1.2, "learning_rate": 1.3637564825928448e-05, "loss": 0.053, "step": 5113 }, { "epoch": 1.2, "learning_rate": 1.36352039677985e-05, "loss": 0.0351, "step": 5114 }, { "epoch": 1.2, "learning_rate": 1.3632842876180106e-05, "loss": 0.023, "step": 5115 }, { "epoch": 1.2, "learning_rate": 1.363048155122492e-05, "loss": 0.0315, "step": 5116 }, { "epoch": 1.2, "learning_rate": 1.3628119993084607e-05, "loss": 0.0267, "step": 5117 }, { "epoch": 1.2, "learning_rate": 1.3625758201910855e-05, "loss": 0.1157, "step": 5118 }, { "epoch": 1.2, "learning_rate": 1.3623396177855355e-05, "loss": 0.0654, "step": 5119 }, { "epoch": 1.2, "learning_rate": 1.3621033921069826e-05, "loss": 0.0483, "step": 5120 }, { "epoch": 1.2, "learning_rate": 1.361867143170599e-05, "loss": 0.0536, "step": 5121 }, { "epoch": 1.2, "learning_rate": 1.3616308709915593e-05, "loss": 0.0707, "step": 5122 }, { "epoch": 1.2, "learning_rate": 1.361394575585039e-05, "loss": 0.0147, "step": 5123 }, { "epoch": 1.2, "learning_rate": 1.3611582569662154e-05, "loss": 0.053, "step": 5124 }, { "epoch": 1.2, "learning_rate": 1.3609219151502668e-05, "loss": 0.022, "step": 5125 }, { "epoch": 1.2, "learning_rate": 1.3606855501523741e-05, "loss": 0.0615, "step": 5126 }, { "epoch": 1.2, "learning_rate": 1.3604491619877183e-05, "loss": 0.0339, "step": 5127 }, { "epoch": 1.2, "learning_rate": 1.360212750671483e-05, "loss": 0.0232, "step": 5128 }, { "epoch": 1.2, "learning_rate": 1.3599763162188529e-05, "loss": 0.0182, "step": 5129 }, { "epoch": 1.2, "learning_rate": 1.359739858645014e-05, "loss": 0.0091, "step": 5130 }, { "epoch": 1.2, "learning_rate": 1.3595033779651537e-05, "loss": 0.0078, "step": 5131 }, { "epoch": 1.2, "learning_rate": 1.3592668741944612e-05, "loss": 0.0284, "step": 5132 }, { "epoch": 1.2, "learning_rate": 1.3590303473481274e-05, "loss": 0.0729, "step": 5133 }, { "epoch": 1.2, "learning_rate": 1.3587937974413443e-05, "loss": 0.0862, "step": 5134 }, { "epoch": 1.21, "learning_rate": 1.358557224489305e-05, "loss": 0.0598, "step": 5135 }, { "epoch": 1.21, "learning_rate": 1.358320628507205e-05, "loss": 0.0784, "step": 5136 }, { "epoch": 1.21, "learning_rate": 1.3580840095102406e-05, "loss": 0.0008, "step": 5137 }, { "epoch": 1.21, "learning_rate": 1.35784736751361e-05, "loss": 0.0257, "step": 5138 }, { "epoch": 1.21, "learning_rate": 1.3576107025325122e-05, "loss": 0.0227, "step": 5139 }, { "epoch": 1.21, "learning_rate": 1.357374014582149e-05, "loss": 0.1466, "step": 5140 }, { "epoch": 1.21, "learning_rate": 1.3571373036777218e-05, "loss": 0.0514, "step": 5141 }, { "epoch": 1.21, "learning_rate": 1.3569005698344354e-05, "loss": 0.0432, "step": 5142 }, { "epoch": 1.21, "learning_rate": 1.3566638130674945e-05, "loss": 0.0427, "step": 5143 }, { "epoch": 1.21, "learning_rate": 1.3564270333921063e-05, "loss": 0.0954, "step": 5144 }, { "epoch": 1.21, "learning_rate": 1.3561902308234788e-05, "loss": 0.0063, "step": 5145 }, { "epoch": 1.21, "learning_rate": 1.3559534053768224e-05, "loss": 0.1052, "step": 5146 }, { "epoch": 1.21, "learning_rate": 1.3557165570673477e-05, "loss": 0.0403, "step": 5147 }, { "epoch": 1.21, "learning_rate": 1.3554796859102677e-05, "loss": 0.0676, "step": 5148 }, { "epoch": 1.21, "learning_rate": 1.3552427919207964e-05, "loss": 0.0636, "step": 5149 }, { "epoch": 1.21, "learning_rate": 1.3550058751141499e-05, "loss": 0.0033, "step": 5150 }, { "epoch": 1.21, "learning_rate": 1.354768935505545e-05, "loss": 0.0365, "step": 5151 }, { "epoch": 1.21, "learning_rate": 1.3545319731102002e-05, "loss": 0.0203, "step": 5152 }, { "epoch": 1.21, "learning_rate": 1.3542949879433358e-05, "loss": 0.0362, "step": 5153 }, { "epoch": 1.21, "learning_rate": 1.354057980020173e-05, "loss": 0.0194, "step": 5154 }, { "epoch": 1.21, "learning_rate": 1.3538209493559349e-05, "loss": 0.0109, "step": 5155 }, { "epoch": 1.21, "learning_rate": 1.3535838959658464e-05, "loss": 0.0304, "step": 5156 }, { "epoch": 1.21, "learning_rate": 1.3533468198651324e-05, "loss": 0.0618, "step": 5157 }, { "epoch": 1.21, "learning_rate": 1.3531097210690213e-05, "loss": 0.042, "step": 5158 }, { "epoch": 1.21, "learning_rate": 1.3528725995927414e-05, "loss": 0.0813, "step": 5159 }, { "epoch": 1.21, "learning_rate": 1.352635455451523e-05, "loss": 0.0561, "step": 5160 }, { "epoch": 1.21, "learning_rate": 1.3523982886605975e-05, "loss": 0.0438, "step": 5161 }, { "epoch": 1.21, "learning_rate": 1.3521610992351985e-05, "loss": 0.0334, "step": 5162 }, { "epoch": 1.21, "learning_rate": 1.3519238871905609e-05, "loss": 0.0898, "step": 5163 }, { "epoch": 1.21, "learning_rate": 1.35168665254192e-05, "loss": 0.0561, "step": 5164 }, { "epoch": 1.21, "learning_rate": 1.351449395304514e-05, "loss": 0.0338, "step": 5165 }, { "epoch": 1.21, "learning_rate": 1.3512121154935817e-05, "loss": 0.0054, "step": 5166 }, { "epoch": 1.21, "learning_rate": 1.3509748131243634e-05, "loss": 0.0217, "step": 5167 }, { "epoch": 1.21, "learning_rate": 1.3507374882121012e-05, "loss": 0.0922, "step": 5168 }, { "epoch": 1.21, "learning_rate": 1.350500140772038e-05, "loss": 0.0149, "step": 5169 }, { "epoch": 1.21, "learning_rate": 1.3502627708194188e-05, "loss": 0.105, "step": 5170 }, { "epoch": 1.21, "learning_rate": 1.3500253783694903e-05, "loss": 0.0851, "step": 5171 }, { "epoch": 1.21, "learning_rate": 1.3497879634374995e-05, "loss": 0.0281, "step": 5172 }, { "epoch": 1.21, "learning_rate": 1.3495505260386956e-05, "loss": 0.0511, "step": 5173 }, { "epoch": 1.21, "learning_rate": 1.34931306618833e-05, "loss": 0.0048, "step": 5174 }, { "epoch": 1.21, "learning_rate": 1.3490755839016534e-05, "loss": 0.0071, "step": 5175 }, { "epoch": 1.21, "learning_rate": 1.3488380791939203e-05, "loss": 0.0403, "step": 5176 }, { "epoch": 1.22, "learning_rate": 1.3486005520803848e-05, "loss": 0.0392, "step": 5177 }, { "epoch": 1.22, "learning_rate": 1.3483630025763042e-05, "loss": 0.0389, "step": 5178 }, { "epoch": 1.22, "learning_rate": 1.3481254306969351e-05, "loss": 0.08, "step": 5179 }, { "epoch": 1.22, "learning_rate": 1.3478878364575375e-05, "loss": 0.0041, "step": 5180 }, { "epoch": 1.22, "learning_rate": 1.3476502198733717e-05, "loss": 0.0324, "step": 5181 }, { "epoch": 1.22, "learning_rate": 1.3474125809596997e-05, "loss": 0.0421, "step": 5182 }, { "epoch": 1.22, "learning_rate": 1.3471749197317852e-05, "loss": 0.0855, "step": 5183 }, { "epoch": 1.22, "learning_rate": 1.3469372362048931e-05, "loss": 0.0477, "step": 5184 }, { "epoch": 1.22, "learning_rate": 1.3466995303942897e-05, "loss": 0.0115, "step": 5185 }, { "epoch": 1.22, "learning_rate": 1.3464618023152432e-05, "loss": 0.0398, "step": 5186 }, { "epoch": 1.22, "learning_rate": 1.346224051983022e-05, "loss": 0.0515, "step": 5187 }, { "epoch": 1.22, "learning_rate": 1.3459862794128973e-05, "loss": 0.0303, "step": 5188 }, { "epoch": 1.22, "learning_rate": 1.3457484846201414e-05, "loss": 0.0291, "step": 5189 }, { "epoch": 1.22, "learning_rate": 1.3455106676200274e-05, "loss": 0.0483, "step": 5190 }, { "epoch": 1.22, "learning_rate": 1.3452728284278302e-05, "loss": 0.0675, "step": 5191 }, { "epoch": 1.22, "learning_rate": 1.3450349670588266e-05, "loss": 0.0499, "step": 5192 }, { "epoch": 1.22, "learning_rate": 1.344797083528294e-05, "loss": 0.039, "step": 5193 }, { "epoch": 1.22, "learning_rate": 1.3445591778515116e-05, "loss": 0.0427, "step": 5194 }, { "epoch": 1.22, "learning_rate": 1.3443212500437604e-05, "loss": 0.0402, "step": 5195 }, { "epoch": 1.22, "learning_rate": 1.344083300120322e-05, "loss": 0.0413, "step": 5196 }, { "epoch": 1.22, "learning_rate": 1.3438453280964803e-05, "loss": 0.0107, "step": 5197 }, { "epoch": 1.22, "learning_rate": 1.34360733398752e-05, "loss": 0.0206, "step": 5198 }, { "epoch": 1.22, "learning_rate": 1.3433693178087275e-05, "loss": 0.0395, "step": 5199 }, { "epoch": 1.22, "learning_rate": 1.3431312795753902e-05, "loss": 0.0089, "step": 5200 }, { "epoch": 1.22, "learning_rate": 1.3428932193027975e-05, "loss": 0.0147, "step": 5201 }, { "epoch": 1.22, "learning_rate": 1.34265513700624e-05, "loss": 0.0417, "step": 5202 }, { "epoch": 1.22, "learning_rate": 1.3424170327010097e-05, "loss": 0.0237, "step": 5203 }, { "epoch": 1.22, "learning_rate": 1.3421789064024e-05, "loss": 0.0021, "step": 5204 }, { "epoch": 1.22, "learning_rate": 1.3419407581257056e-05, "loss": 0.0179, "step": 5205 }, { "epoch": 1.22, "learning_rate": 1.3417025878862227e-05, "loss": 0.0033, "step": 5206 }, { "epoch": 1.22, "learning_rate": 1.341464395699249e-05, "loss": 0.0028, "step": 5207 }, { "epoch": 1.22, "learning_rate": 1.3412261815800835e-05, "loss": 0.0321, "step": 5208 }, { "epoch": 1.22, "learning_rate": 1.3409879455440267e-05, "loss": 0.0037, "step": 5209 }, { "epoch": 1.22, "learning_rate": 1.3407496876063806e-05, "loss": 0.0669, "step": 5210 }, { "epoch": 1.22, "learning_rate": 1.3405114077824482e-05, "loss": 0.0538, "step": 5211 }, { "epoch": 1.22, "learning_rate": 1.3402731060875343e-05, "loss": 0.0802, "step": 5212 }, { "epoch": 1.22, "learning_rate": 1.3400347825369446e-05, "loss": 0.0921, "step": 5213 }, { "epoch": 1.22, "learning_rate": 1.3397964371459871e-05, "loss": 0.0292, "step": 5214 }, { "epoch": 1.22, "learning_rate": 1.3395580699299706e-05, "loss": 0.0671, "step": 5215 }, { "epoch": 1.22, "learning_rate": 1.339319680904205e-05, "loss": 0.077, "step": 5216 }, { "epoch": 1.22, "learning_rate": 1.3390812700840026e-05, "loss": 0.0749, "step": 5217 }, { "epoch": 1.22, "learning_rate": 1.3388428374846759e-05, "loss": 0.0343, "step": 5218 }, { "epoch": 1.22, "learning_rate": 1.3386043831215396e-05, "loss": 0.0547, "step": 5219 }, { "epoch": 1.23, "learning_rate": 1.3383659070099095e-05, "loss": 0.0424, "step": 5220 }, { "epoch": 1.23, "learning_rate": 1.338127409165103e-05, "loss": 0.0269, "step": 5221 }, { "epoch": 1.23, "learning_rate": 1.337888889602439e-05, "loss": 0.0586, "step": 5222 }, { "epoch": 1.23, "learning_rate": 1.337650348337237e-05, "loss": 0.0106, "step": 5223 }, { "epoch": 1.23, "learning_rate": 1.3374117853848187e-05, "loss": 0.0574, "step": 5224 }, { "epoch": 1.23, "learning_rate": 1.3371732007605074e-05, "loss": 0.0314, "step": 5225 }, { "epoch": 1.23, "learning_rate": 1.3369345944796266e-05, "loss": 0.0088, "step": 5226 }, { "epoch": 1.23, "learning_rate": 1.336695966557502e-05, "loss": 0.0273, "step": 5227 }, { "epoch": 1.23, "learning_rate": 1.3364573170094618e-05, "loss": 0.0226, "step": 5228 }, { "epoch": 1.23, "learning_rate": 1.3362186458508327e-05, "loss": 0.0506, "step": 5229 }, { "epoch": 1.23, "learning_rate": 1.3359799530969455e-05, "loss": 0.0616, "step": 5230 }, { "epoch": 1.23, "learning_rate": 1.3357412387631314e-05, "loss": 0.0124, "step": 5231 }, { "epoch": 1.23, "learning_rate": 1.3355025028647225e-05, "loss": 0.1321, "step": 5232 }, { "epoch": 1.23, "learning_rate": 1.3352637454170534e-05, "loss": 0.0422, "step": 5233 }, { "epoch": 1.23, "learning_rate": 1.3350249664354589e-05, "loss": 0.0228, "step": 5234 }, { "epoch": 1.23, "learning_rate": 1.3347861659352762e-05, "loss": 0.088, "step": 5235 }, { "epoch": 1.23, "learning_rate": 1.3345473439318426e-05, "loss": 0.0542, "step": 5236 }, { "epoch": 1.23, "learning_rate": 1.3343085004404983e-05, "loss": 0.0042, "step": 5237 }, { "epoch": 1.23, "learning_rate": 1.3340696354765843e-05, "loss": 0.0718, "step": 5238 }, { "epoch": 1.23, "learning_rate": 1.333830749055442e-05, "loss": 0.0632, "step": 5239 }, { "epoch": 1.23, "learning_rate": 1.3335918411924158e-05, "loss": 0.0083, "step": 5240 }, { "epoch": 1.23, "learning_rate": 1.3333529119028508e-05, "loss": 0.0468, "step": 5241 }, { "epoch": 1.23, "learning_rate": 1.3331139612020927e-05, "loss": 0.0015, "step": 5242 }, { "epoch": 1.23, "learning_rate": 1.3328749891054897e-05, "loss": 0.0169, "step": 5243 }, { "epoch": 1.23, "learning_rate": 1.3326359956283907e-05, "loss": 0.1139, "step": 5244 }, { "epoch": 1.23, "learning_rate": 1.3323969807861468e-05, "loss": 0.0207, "step": 5245 }, { "epoch": 1.23, "learning_rate": 1.332157944594109e-05, "loss": 0.0484, "step": 5246 }, { "epoch": 1.23, "learning_rate": 1.331918887067631e-05, "loss": 0.0469, "step": 5247 }, { "epoch": 1.23, "learning_rate": 1.3316798082220674e-05, "loss": 0.0203, "step": 5248 }, { "epoch": 1.23, "learning_rate": 1.3314407080727746e-05, "loss": 0.0049, "step": 5249 }, { "epoch": 1.23, "learning_rate": 1.3312015866351089e-05, "loss": 0.0514, "step": 5250 }, { "epoch": 1.23, "learning_rate": 1.3309624439244301e-05, "loss": 0.0089, "step": 5251 }, { "epoch": 1.23, "learning_rate": 1.3307232799560977e-05, "loss": 0.0283, "step": 5252 }, { "epoch": 1.23, "learning_rate": 1.3304840947454734e-05, "loss": 0.0259, "step": 5253 }, { "epoch": 1.23, "learning_rate": 1.3302448883079198e-05, "loss": 0.0285, "step": 5254 }, { "epoch": 1.23, "learning_rate": 1.330005660658801e-05, "loss": 0.0454, "step": 5255 }, { "epoch": 1.23, "learning_rate": 1.329766411813483e-05, "loss": 0.0277, "step": 5256 }, { "epoch": 1.23, "learning_rate": 1.3295271417873325e-05, "loss": 0.0325, "step": 5257 }, { "epoch": 1.23, "learning_rate": 1.3292878505957177e-05, "loss": 0.0953, "step": 5258 }, { "epoch": 1.23, "learning_rate": 1.3290485382540084e-05, "loss": 0.0597, "step": 5259 }, { "epoch": 1.23, "learning_rate": 1.3288092047775752e-05, "loss": 0.0781, "step": 5260 }, { "epoch": 1.23, "learning_rate": 1.3285698501817907e-05, "loss": 0.0553, "step": 5261 }, { "epoch": 1.23, "learning_rate": 1.3283304744820287e-05, "loss": 0.0206, "step": 5262 }, { "epoch": 1.24, "learning_rate": 1.3280910776936642e-05, "loss": 0.0068, "step": 5263 }, { "epoch": 1.24, "learning_rate": 1.3278516598320732e-05, "loss": 0.0283, "step": 5264 }, { "epoch": 1.24, "learning_rate": 1.327612220912634e-05, "loss": 0.0098, "step": 5265 }, { "epoch": 1.24, "learning_rate": 1.3273727609507253e-05, "loss": 0.01, "step": 5266 }, { "epoch": 1.24, "learning_rate": 1.3271332799617283e-05, "loss": 0.083, "step": 5267 }, { "epoch": 1.24, "learning_rate": 1.3268937779610238e-05, "loss": 0.0717, "step": 5268 }, { "epoch": 1.24, "learning_rate": 1.3266542549639958e-05, "loss": 0.0094, "step": 5269 }, { "epoch": 1.24, "learning_rate": 1.3264147109860284e-05, "loss": 0.0423, "step": 5270 }, { "epoch": 1.24, "learning_rate": 1.3261751460425077e-05, "loss": 0.0429, "step": 5271 }, { "epoch": 1.24, "learning_rate": 1.3259355601488203e-05, "loss": 0.0184, "step": 5272 }, { "epoch": 1.24, "learning_rate": 1.3256959533203556e-05, "loss": 0.0358, "step": 5273 }, { "epoch": 1.24, "learning_rate": 1.3254563255725028e-05, "loss": 0.015, "step": 5274 }, { "epoch": 1.24, "learning_rate": 1.3252166769206539e-05, "loss": 0.0374, "step": 5275 }, { "epoch": 1.24, "learning_rate": 1.3249770073802007e-05, "loss": 0.0399, "step": 5276 }, { "epoch": 1.24, "learning_rate": 1.3247373169665379e-05, "loss": 0.0055, "step": 5277 }, { "epoch": 1.24, "learning_rate": 1.3244976056950598e-05, "loss": 0.006, "step": 5278 }, { "epoch": 1.24, "learning_rate": 1.324257873581164e-05, "loss": 0.0491, "step": 5279 }, { "epoch": 1.24, "learning_rate": 1.3240181206402477e-05, "loss": 0.0278, "step": 5280 }, { "epoch": 1.24, "learning_rate": 1.3237783468877106e-05, "loss": 0.0953, "step": 5281 }, { "epoch": 1.24, "learning_rate": 1.323538552338953e-05, "loss": 0.0368, "step": 5282 }, { "epoch": 1.24, "learning_rate": 1.3232987370093773e-05, "loss": 0.0675, "step": 5283 }, { "epoch": 1.24, "learning_rate": 1.3230589009143865e-05, "loss": 0.078, "step": 5284 }, { "epoch": 1.24, "learning_rate": 1.3228190440693854e-05, "loss": 0.0144, "step": 5285 }, { "epoch": 1.24, "learning_rate": 1.3225791664897796e-05, "loss": 0.0761, "step": 5286 }, { "epoch": 1.24, "learning_rate": 1.3223392681909769e-05, "loss": 0.0339, "step": 5287 }, { "epoch": 1.24, "learning_rate": 1.3220993491883854e-05, "loss": 0.0326, "step": 5288 }, { "epoch": 1.24, "learning_rate": 1.3218594094974155e-05, "loss": 0.0075, "step": 5289 }, { "epoch": 1.24, "learning_rate": 1.3216194491334779e-05, "loss": 0.0346, "step": 5290 }, { "epoch": 1.24, "learning_rate": 1.321379468111986e-05, "loss": 0.0129, "step": 5291 }, { "epoch": 1.24, "learning_rate": 1.321139466448353e-05, "loss": 0.0611, "step": 5292 }, { "epoch": 1.24, "learning_rate": 1.3208994441579948e-05, "loss": 0.0417, "step": 5293 }, { "epoch": 1.24, "learning_rate": 1.3206594012563274e-05, "loss": 0.0171, "step": 5294 }, { "epoch": 1.24, "learning_rate": 1.3204193377587693e-05, "loss": 0.0146, "step": 5295 }, { "epoch": 1.24, "learning_rate": 1.3201792536807392e-05, "loss": 0.039, "step": 5296 }, { "epoch": 1.24, "learning_rate": 1.3199391490376579e-05, "loss": 0.0681, "step": 5297 }, { "epoch": 1.24, "learning_rate": 1.3196990238449472e-05, "loss": 0.0375, "step": 5298 }, { "epoch": 1.24, "learning_rate": 1.3194588781180303e-05, "loss": 0.0348, "step": 5299 }, { "epoch": 1.24, "learning_rate": 1.3192187118723316e-05, "loss": 0.0376, "step": 5300 }, { "epoch": 1.24, "learning_rate": 1.3189785251232773e-05, "loss": 0.003, "step": 5301 }, { "epoch": 1.24, "learning_rate": 1.3187383178862942e-05, "loss": 0.0693, "step": 5302 }, { "epoch": 1.24, "learning_rate": 1.3184980901768112e-05, "loss": 0.0488, "step": 5303 }, { "epoch": 1.24, "learning_rate": 1.3182578420102575e-05, "loss": 0.0683, "step": 5304 }, { "epoch": 1.25, "learning_rate": 1.3180175734020645e-05, "loss": 0.0236, "step": 5305 }, { "epoch": 1.25, "learning_rate": 1.3177772843676646e-05, "loss": 0.0376, "step": 5306 }, { "epoch": 1.25, "learning_rate": 1.3175369749224916e-05, "loss": 0.0151, "step": 5307 }, { "epoch": 1.25, "learning_rate": 1.3172966450819799e-05, "loss": 0.0234, "step": 5308 }, { "epoch": 1.25, "learning_rate": 1.3170562948615669e-05, "loss": 0.052, "step": 5309 }, { "epoch": 1.25, "learning_rate": 1.3168159242766893e-05, "loss": 0.0485, "step": 5310 }, { "epoch": 1.25, "learning_rate": 1.316575533342787e-05, "loss": 0.0516, "step": 5311 }, { "epoch": 1.25, "learning_rate": 1.3163351220752994e-05, "loss": 0.0364, "step": 5312 }, { "epoch": 1.25, "learning_rate": 1.3160946904896684e-05, "loss": 0.025, "step": 5313 }, { "epoch": 1.25, "learning_rate": 1.3158542386013366e-05, "loss": 0.0754, "step": 5314 }, { "epoch": 1.25, "learning_rate": 1.3156137664257488e-05, "loss": 0.0106, "step": 5315 }, { "epoch": 1.25, "learning_rate": 1.3153732739783497e-05, "loss": 0.0163, "step": 5316 }, { "epoch": 1.25, "learning_rate": 1.315132761274587e-05, "loss": 0.0565, "step": 5317 }, { "epoch": 1.25, "learning_rate": 1.3148922283299079e-05, "loss": 0.0069, "step": 5318 }, { "epoch": 1.25, "learning_rate": 1.3146516751597627e-05, "loss": 0.0261, "step": 5319 }, { "epoch": 1.25, "learning_rate": 1.314411101779601e-05, "loss": 0.0201, "step": 5320 }, { "epoch": 1.25, "learning_rate": 1.3141705082048756e-05, "loss": 0.0131, "step": 5321 }, { "epoch": 1.25, "learning_rate": 1.3139298944510394e-05, "loss": 0.0426, "step": 5322 }, { "epoch": 1.25, "learning_rate": 1.3136892605335475e-05, "loss": 0.0341, "step": 5323 }, { "epoch": 1.25, "learning_rate": 1.313448606467855e-05, "loss": 0.0647, "step": 5324 }, { "epoch": 1.25, "learning_rate": 1.3132079322694193e-05, "loss": 0.0736, "step": 5325 }, { "epoch": 1.25, "learning_rate": 1.312967237953699e-05, "loss": 0.0889, "step": 5326 }, { "epoch": 1.25, "learning_rate": 1.3127265235361541e-05, "loss": 0.0649, "step": 5327 }, { "epoch": 1.25, "learning_rate": 1.3124857890322451e-05, "loss": 0.0499, "step": 5328 }, { "epoch": 1.25, "learning_rate": 1.312245034457435e-05, "loss": 0.0308, "step": 5329 }, { "epoch": 1.25, "learning_rate": 1.3120042598271866e-05, "loss": 0.1112, "step": 5330 }, { "epoch": 1.25, "learning_rate": 1.3117634651569655e-05, "loss": 0.0413, "step": 5331 }, { "epoch": 1.25, "learning_rate": 1.3115226504622374e-05, "loss": 0.0323, "step": 5332 }, { "epoch": 1.25, "learning_rate": 1.31128181575847e-05, "loss": 0.0384, "step": 5333 }, { "epoch": 1.25, "learning_rate": 1.311040961061132e-05, "loss": 0.0716, "step": 5334 }, { "epoch": 1.25, "learning_rate": 1.310800086385694e-05, "loss": 0.0151, "step": 5335 }, { "epoch": 1.25, "learning_rate": 1.3105591917476263e-05, "loss": 0.0663, "step": 5336 }, { "epoch": 1.25, "learning_rate": 1.3103182771624026e-05, "loss": 0.1083, "step": 5337 }, { "epoch": 1.25, "learning_rate": 1.3100773426454957e-05, "loss": 0.0241, "step": 5338 }, { "epoch": 1.25, "learning_rate": 1.3098363882123818e-05, "loss": 0.082, "step": 5339 }, { "epoch": 1.25, "learning_rate": 1.3095954138785368e-05, "loss": 0.0408, "step": 5340 }, { "epoch": 1.25, "learning_rate": 1.3093544196594385e-05, "loss": 0.0671, "step": 5341 }, { "epoch": 1.25, "learning_rate": 1.3091134055705656e-05, "loss": 0.0455, "step": 5342 }, { "epoch": 1.25, "learning_rate": 1.3088723716273993e-05, "loss": 0.0253, "step": 5343 }, { "epoch": 1.25, "learning_rate": 1.3086313178454203e-05, "loss": 0.084, "step": 5344 }, { "epoch": 1.25, "learning_rate": 1.308390244240112e-05, "loss": 0.0174, "step": 5345 }, { "epoch": 1.25, "learning_rate": 1.308149150826958e-05, "loss": 0.0209, "step": 5346 }, { "epoch": 1.25, "learning_rate": 1.3079080376214439e-05, "loss": 0.0444, "step": 5347 }, { "epoch": 1.26, "learning_rate": 1.3076669046390565e-05, "loss": 0.0203, "step": 5348 }, { "epoch": 1.26, "learning_rate": 1.3074257518952835e-05, "loss": 0.0138, "step": 5349 }, { "epoch": 1.26, "learning_rate": 1.3071845794056141e-05, "loss": 0.0358, "step": 5350 }, { "epoch": 1.26, "learning_rate": 1.3069433871855391e-05, "loss": 0.0174, "step": 5351 }, { "epoch": 1.26, "learning_rate": 1.3067021752505498e-05, "loss": 0.0455, "step": 5352 }, { "epoch": 1.26, "learning_rate": 1.3064609436161398e-05, "loss": 0.0333, "step": 5353 }, { "epoch": 1.26, "learning_rate": 1.3062196922978025e-05, "loss": 0.0867, "step": 5354 }, { "epoch": 1.26, "learning_rate": 1.3059784213110343e-05, "loss": 0.0046, "step": 5355 }, { "epoch": 1.26, "learning_rate": 1.305737130671331e-05, "loss": 0.0336, "step": 5356 }, { "epoch": 1.26, "learning_rate": 1.3054958203941916e-05, "loss": 0.0354, "step": 5357 }, { "epoch": 1.26, "learning_rate": 1.305254490495115e-05, "loss": 0.0187, "step": 5358 }, { "epoch": 1.26, "learning_rate": 1.3050131409896017e-05, "loss": 0.0477, "step": 5359 }, { "epoch": 1.26, "learning_rate": 1.3047717718931537e-05, "loss": 0.0587, "step": 5360 }, { "epoch": 1.26, "learning_rate": 1.3045303832212739e-05, "loss": 0.0751, "step": 5361 }, { "epoch": 1.26, "learning_rate": 1.304288974989467e-05, "loss": 0.1936, "step": 5362 }, { "epoch": 1.26, "learning_rate": 1.3040475472132382e-05, "loss": 0.0659, "step": 5363 }, { "epoch": 1.26, "learning_rate": 1.3038060999080945e-05, "loss": 0.128, "step": 5364 }, { "epoch": 1.26, "learning_rate": 1.3035646330895442e-05, "loss": 0.0101, "step": 5365 }, { "epoch": 1.26, "learning_rate": 1.3033231467730964e-05, "loss": 0.0869, "step": 5366 }, { "epoch": 1.26, "learning_rate": 1.303081640974262e-05, "loss": 0.046, "step": 5367 }, { "epoch": 1.26, "learning_rate": 1.3028401157085524e-05, "loss": 0.0082, "step": 5368 }, { "epoch": 1.26, "learning_rate": 1.3025985709914813e-05, "loss": 0.0032, "step": 5369 }, { "epoch": 1.26, "learning_rate": 1.3023570068385625e-05, "loss": 0.0309, "step": 5370 }, { "epoch": 1.26, "learning_rate": 1.3021154232653124e-05, "loss": 0.0434, "step": 5371 }, { "epoch": 1.26, "learning_rate": 1.301873820287247e-05, "loss": 0.0215, "step": 5372 }, { "epoch": 1.26, "learning_rate": 1.3016321979198849e-05, "loss": 0.0717, "step": 5373 }, { "epoch": 1.26, "learning_rate": 1.3013905561787453e-05, "loss": 0.0168, "step": 5374 }, { "epoch": 1.26, "learning_rate": 1.301148895079349e-05, "loss": 0.0336, "step": 5375 }, { "epoch": 1.26, "learning_rate": 1.3009072146372173e-05, "loss": 0.0417, "step": 5376 }, { "epoch": 1.26, "learning_rate": 1.300665514867874e-05, "loss": 0.0663, "step": 5377 }, { "epoch": 1.26, "learning_rate": 1.3004237957868431e-05, "loss": 0.0225, "step": 5378 }, { "epoch": 1.26, "learning_rate": 1.3001820574096502e-05, "loss": 0.0227, "step": 5379 }, { "epoch": 1.26, "learning_rate": 1.299940299751822e-05, "loss": 0.0909, "step": 5380 }, { "epoch": 1.26, "learning_rate": 1.2996985228288867e-05, "loss": 0.0155, "step": 5381 }, { "epoch": 1.26, "learning_rate": 1.2994567266563736e-05, "loss": 0.0078, "step": 5382 }, { "epoch": 1.26, "learning_rate": 1.2992149112498132e-05, "loss": 0.0105, "step": 5383 }, { "epoch": 1.26, "learning_rate": 1.2989730766247369e-05, "loss": 0.0219, "step": 5384 }, { "epoch": 1.26, "learning_rate": 1.298731222796678e-05, "loss": 0.0124, "step": 5385 }, { "epoch": 1.26, "learning_rate": 1.2984893497811708e-05, "loss": 0.0463, "step": 5386 }, { "epoch": 1.26, "learning_rate": 1.298247457593751e-05, "loss": 0.0131, "step": 5387 }, { "epoch": 1.26, "learning_rate": 1.2980055462499545e-05, "loss": 0.026, "step": 5388 }, { "epoch": 1.26, "learning_rate": 1.29776361576532e-05, "loss": 0.0063, "step": 5389 }, { "epoch": 1.26, "learning_rate": 1.2975216661553861e-05, "loss": 0.0069, "step": 5390 }, { "epoch": 1.27, "learning_rate": 1.2972796974356935e-05, "loss": 0.007, "step": 5391 }, { "epoch": 1.27, "learning_rate": 1.2970377096217836e-05, "loss": 0.0091, "step": 5392 }, { "epoch": 1.27, "learning_rate": 1.2967957027291997e-05, "loss": 0.0743, "step": 5393 }, { "epoch": 1.27, "learning_rate": 1.296553676773485e-05, "loss": 0.0402, "step": 5394 }, { "epoch": 1.27, "learning_rate": 1.2963116317701859e-05, "loss": 0.0981, "step": 5395 }, { "epoch": 1.27, "learning_rate": 1.2960695677348476e-05, "loss": 0.0289, "step": 5396 }, { "epoch": 1.27, "learning_rate": 1.295827484683019e-05, "loss": 0.1342, "step": 5397 }, { "epoch": 1.27, "learning_rate": 1.2955853826302482e-05, "loss": 0.1299, "step": 5398 }, { "epoch": 1.27, "learning_rate": 1.295343261592086e-05, "loss": 0.0091, "step": 5399 }, { "epoch": 1.27, "learning_rate": 1.2951011215840834e-05, "loss": 0.0143, "step": 5400 }, { "epoch": 1.27, "learning_rate": 1.294858962621793e-05, "loss": 0.0768, "step": 5401 }, { "epoch": 1.27, "learning_rate": 1.2946167847207686e-05, "loss": 0.0855, "step": 5402 }, { "epoch": 1.27, "learning_rate": 1.2943745878965658e-05, "loss": 0.1032, "step": 5403 }, { "epoch": 1.27, "learning_rate": 1.2941323721647403e-05, "loss": 0.0353, "step": 5404 }, { "epoch": 1.27, "learning_rate": 1.2938901375408497e-05, "loss": 0.0253, "step": 5405 }, { "epoch": 1.27, "learning_rate": 1.2936478840404525e-05, "loss": 0.0398, "step": 5406 }, { "epoch": 1.27, "learning_rate": 1.2934056116791092e-05, "loss": 0.0318, "step": 5407 }, { "epoch": 1.27, "learning_rate": 1.2931633204723801e-05, "loss": 0.0107, "step": 5408 }, { "epoch": 1.27, "learning_rate": 1.2929210104358281e-05, "loss": 0.0286, "step": 5409 }, { "epoch": 1.27, "learning_rate": 1.2926786815850162e-05, "loss": 0.0507, "step": 5410 }, { "epoch": 1.27, "learning_rate": 1.2924363339355097e-05, "loss": 0.0299, "step": 5411 }, { "epoch": 1.27, "learning_rate": 1.292193967502874e-05, "loss": 0.0278, "step": 5412 }, { "epoch": 1.27, "learning_rate": 1.2919515823026771e-05, "loss": 0.0712, "step": 5413 }, { "epoch": 1.27, "learning_rate": 1.2917091783504865e-05, "loss": 0.0304, "step": 5414 }, { "epoch": 1.27, "learning_rate": 1.2914667556618723e-05, "loss": 0.0436, "step": 5415 }, { "epoch": 1.27, "learning_rate": 1.2912243142524048e-05, "loss": 0.0779, "step": 5416 }, { "epoch": 1.27, "learning_rate": 1.2909818541376562e-05, "loss": 0.0469, "step": 5417 }, { "epoch": 1.27, "learning_rate": 1.2907393753331996e-05, "loss": 0.0358, "step": 5418 }, { "epoch": 1.27, "learning_rate": 1.2904968778546097e-05, "loss": 0.0166, "step": 5419 }, { "epoch": 1.27, "learning_rate": 1.2902543617174614e-05, "loss": 0.1033, "step": 5420 }, { "epoch": 1.27, "learning_rate": 1.2900118269373323e-05, "loss": 0.0596, "step": 5421 }, { "epoch": 1.27, "learning_rate": 1.2897692735297995e-05, "loss": 0.0088, "step": 5422 }, { "epoch": 1.27, "learning_rate": 1.2895267015104428e-05, "loss": 0.0424, "step": 5423 }, { "epoch": 1.27, "learning_rate": 1.2892841108948422e-05, "loss": 0.0256, "step": 5424 }, { "epoch": 1.27, "learning_rate": 1.2890415016985798e-05, "loss": 0.0534, "step": 5425 }, { "epoch": 1.27, "learning_rate": 1.2887988739372374e-05, "loss": 0.022, "step": 5426 }, { "epoch": 1.27, "learning_rate": 1.2885562276263999e-05, "loss": 0.019, "step": 5427 }, { "epoch": 1.27, "learning_rate": 1.2883135627816514e-05, "loss": 0.019, "step": 5428 }, { "epoch": 1.27, "learning_rate": 1.2880708794185793e-05, "loss": 0.0504, "step": 5429 }, { "epoch": 1.27, "learning_rate": 1.2878281775527701e-05, "loss": 0.0528, "step": 5430 }, { "epoch": 1.27, "learning_rate": 1.2875854571998136e-05, "loss": 0.0546, "step": 5431 }, { "epoch": 1.27, "learning_rate": 1.2873427183752986e-05, "loss": 0.0708, "step": 5432 }, { "epoch": 1.28, "learning_rate": 1.2870999610948171e-05, "loss": 0.0477, "step": 5433 }, { "epoch": 1.28, "learning_rate": 1.2868571853739606e-05, "loss": 0.0356, "step": 5434 }, { "epoch": 1.28, "learning_rate": 1.2866143912283229e-05, "loss": 0.0738, "step": 5435 }, { "epoch": 1.28, "learning_rate": 1.2863715786734982e-05, "loss": 0.0737, "step": 5436 }, { "epoch": 1.28, "learning_rate": 1.2861287477250832e-05, "loss": 0.0303, "step": 5437 }, { "epoch": 1.28, "learning_rate": 1.285885898398674e-05, "loss": 0.1061, "step": 5438 }, { "epoch": 1.28, "learning_rate": 1.2856430307098693e-05, "loss": 0.0285, "step": 5439 }, { "epoch": 1.28, "learning_rate": 1.2854001446742681e-05, "loss": 0.034, "step": 5440 }, { "epoch": 1.28, "learning_rate": 1.2851572403074713e-05, "loss": 0.0366, "step": 5441 }, { "epoch": 1.28, "learning_rate": 1.2849143176250802e-05, "loss": 0.0579, "step": 5442 }, { "epoch": 1.28, "learning_rate": 1.2846713766426979e-05, "loss": 0.0537, "step": 5443 }, { "epoch": 1.28, "learning_rate": 1.2844284173759285e-05, "loss": 0.0276, "step": 5444 }, { "epoch": 1.28, "learning_rate": 1.2841854398403769e-05, "loss": 0.0636, "step": 5445 }, { "epoch": 1.28, "learning_rate": 1.2839424440516496e-05, "loss": 0.0268, "step": 5446 }, { "epoch": 1.28, "learning_rate": 1.2836994300253548e-05, "loss": 0.0984, "step": 5447 }, { "epoch": 1.28, "learning_rate": 1.2834563977771003e-05, "loss": 0.0228, "step": 5448 }, { "epoch": 1.28, "learning_rate": 1.2832133473224969e-05, "loss": 0.0644, "step": 5449 }, { "epoch": 1.28, "learning_rate": 1.282970278677155e-05, "loss": 0.0486, "step": 5450 }, { "epoch": 1.28, "learning_rate": 1.2827271918566871e-05, "loss": 0.0557, "step": 5451 }, { "epoch": 1.28, "learning_rate": 1.2824840868767066e-05, "loss": 0.0394, "step": 5452 }, { "epoch": 1.28, "learning_rate": 1.2822409637528281e-05, "loss": 0.0091, "step": 5453 }, { "epoch": 1.28, "learning_rate": 1.2819978225006671e-05, "loss": 0.0405, "step": 5454 }, { "epoch": 1.28, "learning_rate": 1.2817546631358411e-05, "loss": 0.0284, "step": 5455 }, { "epoch": 1.28, "learning_rate": 1.2815114856739676e-05, "loss": 0.0516, "step": 5456 }, { "epoch": 1.28, "learning_rate": 1.2812682901306666e-05, "loss": 0.0439, "step": 5457 }, { "epoch": 1.28, "learning_rate": 1.2810250765215575e-05, "loss": 0.0692, "step": 5458 }, { "epoch": 1.28, "learning_rate": 1.2807818448622627e-05, "loss": 0.0189, "step": 5459 }, { "epoch": 1.28, "learning_rate": 1.2805385951684042e-05, "loss": 0.0204, "step": 5460 }, { "epoch": 1.28, "learning_rate": 1.2802953274556064e-05, "loss": 0.0362, "step": 5461 }, { "epoch": 1.28, "learning_rate": 1.280052041739494e-05, "loss": 0.0738, "step": 5462 }, { "epoch": 1.28, "learning_rate": 1.2798087380356936e-05, "loss": 0.0117, "step": 5463 }, { "epoch": 1.28, "learning_rate": 1.2795654163598323e-05, "loss": 0.0238, "step": 5464 }, { "epoch": 1.28, "learning_rate": 1.2793220767275388e-05, "loss": 0.0731, "step": 5465 }, { "epoch": 1.28, "learning_rate": 1.2790787191544425e-05, "loss": 0.0429, "step": 5466 }, { "epoch": 1.28, "learning_rate": 1.2788353436561742e-05, "loss": 0.0357, "step": 5467 }, { "epoch": 1.28, "learning_rate": 1.278591950248366e-05, "loss": 0.0237, "step": 5468 }, { "epoch": 1.28, "learning_rate": 1.2783485389466513e-05, "loss": 0.0143, "step": 5469 }, { "epoch": 1.28, "learning_rate": 1.2781051097666634e-05, "loss": 0.0248, "step": 5470 }, { "epoch": 1.28, "learning_rate": 1.2778616627240388e-05, "loss": 0.0649, "step": 5471 }, { "epoch": 1.28, "learning_rate": 1.2776181978344133e-05, "loss": 0.0057, "step": 5472 }, { "epoch": 1.28, "learning_rate": 1.2773747151134255e-05, "loss": 0.0723, "step": 5473 }, { "epoch": 1.28, "learning_rate": 1.2771312145767132e-05, "loss": 0.0369, "step": 5474 }, { "epoch": 1.28, "learning_rate": 1.2768876962399168e-05, "loss": 0.0429, "step": 5475 }, { "epoch": 1.29, "learning_rate": 1.2766441601186777e-05, "loss": 0.029, "step": 5476 }, { "epoch": 1.29, "learning_rate": 1.2764006062286378e-05, "loss": 0.0021, "step": 5477 }, { "epoch": 1.29, "learning_rate": 1.2761570345854407e-05, "loss": 0.0036, "step": 5478 }, { "epoch": 1.29, "learning_rate": 1.275913445204731e-05, "loss": 0.0072, "step": 5479 }, { "epoch": 1.29, "learning_rate": 1.2756698381021541e-05, "loss": 0.0084, "step": 5480 }, { "epoch": 1.29, "learning_rate": 1.2754262132933576e-05, "loss": 0.0095, "step": 5481 }, { "epoch": 1.29, "learning_rate": 1.2751825707939884e-05, "loss": 0.016, "step": 5482 }, { "epoch": 1.29, "learning_rate": 1.2749389106196968e-05, "loss": 0.0301, "step": 5483 }, { "epoch": 1.29, "learning_rate": 1.2746952327861318e-05, "loss": 0.0378, "step": 5484 }, { "epoch": 1.29, "learning_rate": 1.2744515373089457e-05, "loss": 0.0006, "step": 5485 }, { "epoch": 1.29, "learning_rate": 1.2742078242037906e-05, "loss": 0.0774, "step": 5486 }, { "epoch": 1.29, "learning_rate": 1.2739640934863202e-05, "loss": 0.0327, "step": 5487 }, { "epoch": 1.29, "learning_rate": 1.2737203451721894e-05, "loss": 0.0035, "step": 5488 }, { "epoch": 1.29, "learning_rate": 1.2734765792770537e-05, "loss": 0.0169, "step": 5489 }, { "epoch": 1.29, "learning_rate": 1.2732327958165707e-05, "loss": 0.0336, "step": 5490 }, { "epoch": 1.29, "learning_rate": 1.2729889948063984e-05, "loss": 0.0763, "step": 5491 }, { "epoch": 1.29, "learning_rate": 1.2727451762621961e-05, "loss": 0.0331, "step": 5492 }, { "epoch": 1.29, "learning_rate": 1.272501340199624e-05, "loss": 0.0203, "step": 5493 }, { "epoch": 1.29, "learning_rate": 1.2722574866343438e-05, "loss": 0.0434, "step": 5494 }, { "epoch": 1.29, "learning_rate": 1.2720136155820182e-05, "loss": 0.0118, "step": 5495 }, { "epoch": 1.29, "learning_rate": 1.271769727058311e-05, "loss": 0.0646, "step": 5496 }, { "epoch": 1.29, "learning_rate": 1.2715258210788867e-05, "loss": 0.0737, "step": 5497 }, { "epoch": 1.29, "learning_rate": 1.271281897659412e-05, "loss": 0.0208, "step": 5498 }, { "epoch": 1.29, "learning_rate": 1.2710379568155535e-05, "loss": 0.0206, "step": 5499 }, { "epoch": 1.29, "learning_rate": 1.2707939985629798e-05, "loss": 0.0096, "step": 5500 }, { "epoch": 1.29, "learning_rate": 1.2705500229173604e-05, "loss": 0.0511, "step": 5501 }, { "epoch": 1.29, "learning_rate": 1.2703060298943654e-05, "loss": 0.0208, "step": 5502 }, { "epoch": 1.29, "learning_rate": 1.2700620195096668e-05, "loss": 0.0218, "step": 5503 }, { "epoch": 1.29, "learning_rate": 1.2698179917789371e-05, "loss": 0.0168, "step": 5504 }, { "epoch": 1.29, "learning_rate": 1.26957394671785e-05, "loss": 0.0507, "step": 5505 }, { "epoch": 1.29, "learning_rate": 1.269329884342081e-05, "loss": 0.0774, "step": 5506 }, { "epoch": 1.29, "learning_rate": 1.2690858046673058e-05, "loss": 0.0491, "step": 5507 }, { "epoch": 1.29, "learning_rate": 1.2688417077092014e-05, "loss": 0.0378, "step": 5508 }, { "epoch": 1.29, "learning_rate": 1.2685975934834467e-05, "loss": 0.0496, "step": 5509 }, { "epoch": 1.29, "learning_rate": 1.2683534620057207e-05, "loss": 0.0148, "step": 5510 }, { "epoch": 1.29, "learning_rate": 1.2681093132917039e-05, "loss": 0.1041, "step": 5511 }, { "epoch": 1.29, "learning_rate": 1.267865147357078e-05, "loss": 0.0086, "step": 5512 }, { "epoch": 1.29, "learning_rate": 1.267620964217526e-05, "loss": 0.0137, "step": 5513 }, { "epoch": 1.29, "learning_rate": 1.2673767638887311e-05, "loss": 0.0168, "step": 5514 }, { "epoch": 1.29, "learning_rate": 1.2671325463863789e-05, "loss": 0.0341, "step": 5515 }, { "epoch": 1.29, "learning_rate": 1.266888311726155e-05, "loss": 0.0252, "step": 5516 }, { "epoch": 1.29, "learning_rate": 1.2666440599237469e-05, "loss": 0.0107, "step": 5517 }, { "epoch": 1.3, "learning_rate": 1.2663997909948423e-05, "loss": 0.0965, "step": 5518 }, { "epoch": 1.3, "learning_rate": 1.2661555049551313e-05, "loss": 0.0185, "step": 5519 }, { "epoch": 1.3, "learning_rate": 1.2659112018203036e-05, "loss": 0.0183, "step": 5520 }, { "epoch": 1.3, "learning_rate": 1.2656668816060513e-05, "loss": 0.0512, "step": 5521 }, { "epoch": 1.3, "learning_rate": 1.2654225443280668e-05, "loss": 0.0011, "step": 5522 }, { "epoch": 1.3, "learning_rate": 1.2651781900020436e-05, "loss": 0.0719, "step": 5523 }, { "epoch": 1.3, "learning_rate": 1.2649338186436773e-05, "loss": 0.0386, "step": 5524 }, { "epoch": 1.3, "learning_rate": 1.264689430268663e-05, "loss": 0.0165, "step": 5525 }, { "epoch": 1.3, "learning_rate": 1.264445024892698e-05, "loss": 0.0089, "step": 5526 }, { "epoch": 1.3, "learning_rate": 1.2642006025314806e-05, "loss": 0.1037, "step": 5527 }, { "epoch": 1.3, "learning_rate": 1.2639561632007098e-05, "loss": 0.039, "step": 5528 }, { "epoch": 1.3, "learning_rate": 1.263711706916086e-05, "loss": 0.0242, "step": 5529 }, { "epoch": 1.3, "learning_rate": 1.2634672336933102e-05, "loss": 0.0473, "step": 5530 }, { "epoch": 1.3, "learning_rate": 1.2632227435480853e-05, "loss": 0.0081, "step": 5531 }, { "epoch": 1.3, "learning_rate": 1.2629782364961152e-05, "loss": 0.0198, "step": 5532 }, { "epoch": 1.3, "learning_rate": 1.2627337125531036e-05, "loss": 0.0197, "step": 5533 }, { "epoch": 1.3, "learning_rate": 1.2624891717347567e-05, "loss": 0.0384, "step": 5534 }, { "epoch": 1.3, "learning_rate": 1.2622446140567815e-05, "loss": 0.0159, "step": 5535 }, { "epoch": 1.3, "learning_rate": 1.2620000395348855e-05, "loss": 0.0301, "step": 5536 }, { "epoch": 1.3, "learning_rate": 1.2617554481847782e-05, "loss": 0.0555, "step": 5537 }, { "epoch": 1.3, "learning_rate": 1.2615108400221691e-05, "loss": 0.0123, "step": 5538 }, { "epoch": 1.3, "learning_rate": 1.2612662150627694e-05, "loss": 0.0332, "step": 5539 }, { "epoch": 1.3, "learning_rate": 1.261021573322292e-05, "loss": 0.004, "step": 5540 }, { "epoch": 1.3, "learning_rate": 1.2607769148164492e-05, "loss": 0.0161, "step": 5541 }, { "epoch": 1.3, "learning_rate": 1.2605322395609563e-05, "loss": 0.0381, "step": 5542 }, { "epoch": 1.3, "learning_rate": 1.260287547571528e-05, "loss": 0.0071, "step": 5543 }, { "epoch": 1.3, "learning_rate": 1.260042838863881e-05, "loss": 0.0268, "step": 5544 }, { "epoch": 1.3, "learning_rate": 1.2597981134537331e-05, "loss": 0.0349, "step": 5545 }, { "epoch": 1.3, "learning_rate": 1.259553371356803e-05, "loss": 0.0037, "step": 5546 }, { "epoch": 1.3, "learning_rate": 1.2593086125888101e-05, "loss": 0.0015, "step": 5547 }, { "epoch": 1.3, "learning_rate": 1.2590638371654755e-05, "loss": 0.0052, "step": 5548 }, { "epoch": 1.3, "learning_rate": 1.2588190451025209e-05, "loss": 0.0087, "step": 5549 }, { "epoch": 1.3, "learning_rate": 1.2585742364156695e-05, "loss": 0.0354, "step": 5550 }, { "epoch": 1.3, "learning_rate": 1.258329411120645e-05, "loss": 0.0366, "step": 5551 }, { "epoch": 1.3, "learning_rate": 1.2580845692331729e-05, "loss": 0.0902, "step": 5552 }, { "epoch": 1.3, "learning_rate": 1.2578397107689787e-05, "loss": 0.0792, "step": 5553 }, { "epoch": 1.3, "learning_rate": 1.25759483574379e-05, "loss": 0.0094, "step": 5554 }, { "epoch": 1.3, "learning_rate": 1.2573499441733355e-05, "loss": 0.0301, "step": 5555 }, { "epoch": 1.3, "learning_rate": 1.2571050360733437e-05, "loss": 0.0105, "step": 5556 }, { "epoch": 1.3, "learning_rate": 1.2568601114595453e-05, "loss": 0.0681, "step": 5557 }, { "epoch": 1.3, "learning_rate": 1.2566151703476722e-05, "loss": 0.0026, "step": 5558 }, { "epoch": 1.3, "learning_rate": 1.2563702127534563e-05, "loss": 0.0025, "step": 5559 }, { "epoch": 1.3, "learning_rate": 1.2561252386926319e-05, "loss": 0.0471, "step": 5560 }, { "epoch": 1.31, "learning_rate": 1.2558802481809326e-05, "loss": 0.0772, "step": 5561 }, { "epoch": 1.31, "learning_rate": 1.255635241234095e-05, "loss": 0.054, "step": 5562 }, { "epoch": 1.31, "learning_rate": 1.2553902178678557e-05, "loss": 0.0214, "step": 5563 }, { "epoch": 1.31, "learning_rate": 1.2551451780979522e-05, "loss": 0.035, "step": 5564 }, { "epoch": 1.31, "learning_rate": 1.2549001219401233e-05, "loss": 0.0211, "step": 5565 }, { "epoch": 1.31, "learning_rate": 1.2546550494101096e-05, "loss": 0.0298, "step": 5566 }, { "epoch": 1.31, "learning_rate": 1.2544099605236511e-05, "loss": 0.0352, "step": 5567 }, { "epoch": 1.31, "learning_rate": 1.2541648552964904e-05, "loss": 0.1084, "step": 5568 }, { "epoch": 1.31, "learning_rate": 1.2539197337443705e-05, "loss": 0.013, "step": 5569 }, { "epoch": 1.31, "learning_rate": 1.2536745958830357e-05, "loss": 0.059, "step": 5570 }, { "epoch": 1.31, "learning_rate": 1.2534294417282304e-05, "loss": 0.0505, "step": 5571 }, { "epoch": 1.31, "learning_rate": 1.2531842712957014e-05, "loss": 0.0091, "step": 5572 }, { "epoch": 1.31, "learning_rate": 1.2529390846011963e-05, "loss": 0.0054, "step": 5573 }, { "epoch": 1.31, "learning_rate": 1.2526938816604623e-05, "loss": 0.0173, "step": 5574 }, { "epoch": 1.31, "learning_rate": 1.2524486624892497e-05, "loss": 0.0571, "step": 5575 }, { "epoch": 1.31, "learning_rate": 1.2522034271033089e-05, "loss": 0.013, "step": 5576 }, { "epoch": 1.31, "learning_rate": 1.2519581755183906e-05, "loss": 0.0204, "step": 5577 }, { "epoch": 1.31, "learning_rate": 1.251712907750248e-05, "loss": 0.0468, "step": 5578 }, { "epoch": 1.31, "learning_rate": 1.2514676238146337e-05, "loss": 0.0365, "step": 5579 }, { "epoch": 1.31, "learning_rate": 1.251222323727303e-05, "loss": 0.019, "step": 5580 }, { "epoch": 1.31, "learning_rate": 1.2509770075040116e-05, "loss": 0.0214, "step": 5581 }, { "epoch": 1.31, "learning_rate": 1.2507316751605153e-05, "loss": 0.0663, "step": 5582 }, { "epoch": 1.31, "learning_rate": 1.2504863267125726e-05, "loss": 0.0227, "step": 5583 }, { "epoch": 1.31, "learning_rate": 1.250240962175942e-05, "loss": 0.0077, "step": 5584 }, { "epoch": 1.31, "learning_rate": 1.2499955815663825e-05, "loss": 0.0137, "step": 5585 }, { "epoch": 1.31, "learning_rate": 1.2497501848996559e-05, "loss": 0.0651, "step": 5586 }, { "epoch": 1.31, "learning_rate": 1.249504772191523e-05, "loss": 0.0297, "step": 5587 }, { "epoch": 1.31, "learning_rate": 1.2492593434577474e-05, "loss": 0.0211, "step": 5588 }, { "epoch": 1.31, "learning_rate": 1.2490138987140923e-05, "loss": 0.0601, "step": 5589 }, { "epoch": 1.31, "learning_rate": 1.248768437976323e-05, "loss": 0.0415, "step": 5590 }, { "epoch": 1.31, "learning_rate": 1.2485229612602052e-05, "loss": 0.0107, "step": 5591 }, { "epoch": 1.31, "learning_rate": 1.2482774685815063e-05, "loss": 0.0634, "step": 5592 }, { "epoch": 1.31, "learning_rate": 1.2480319599559934e-05, "loss": 0.0347, "step": 5593 }, { "epoch": 1.31, "learning_rate": 1.2477864353994362e-05, "loss": 0.0598, "step": 5594 }, { "epoch": 1.31, "learning_rate": 1.2475408949276042e-05, "loss": 0.0535, "step": 5595 }, { "epoch": 1.31, "learning_rate": 1.247295338556269e-05, "loss": 0.0906, "step": 5596 }, { "epoch": 1.31, "learning_rate": 1.2470497663012018e-05, "loss": 0.0427, "step": 5597 }, { "epoch": 1.31, "learning_rate": 1.2468041781781763e-05, "loss": 0.0211, "step": 5598 }, { "epoch": 1.31, "learning_rate": 1.2465585742029665e-05, "loss": 0.0228, "step": 5599 }, { "epoch": 1.31, "learning_rate": 1.2463129543913473e-05, "loss": 0.0471, "step": 5600 }, { "epoch": 1.31, "learning_rate": 1.2460673187590949e-05, "loss": 0.0434, "step": 5601 }, { "epoch": 1.31, "learning_rate": 1.2458216673219865e-05, "loss": 0.0268, "step": 5602 }, { "epoch": 1.31, "learning_rate": 1.2455760000958002e-05, "loss": 0.0094, "step": 5603 }, { "epoch": 1.32, "learning_rate": 1.2453303170963151e-05, "loss": 0.1069, "step": 5604 }, { "epoch": 1.32, "learning_rate": 1.2450846183393113e-05, "loss": 0.0235, "step": 5605 }, { "epoch": 1.32, "learning_rate": 1.2448389038405705e-05, "loss": 0.0019, "step": 5606 }, { "epoch": 1.32, "learning_rate": 1.2445931736158739e-05, "loss": 0.053, "step": 5607 }, { "epoch": 1.32, "learning_rate": 1.2443474276810055e-05, "loss": 0.0423, "step": 5608 }, { "epoch": 1.32, "learning_rate": 1.2441016660517494e-05, "loss": 0.0226, "step": 5609 }, { "epoch": 1.32, "learning_rate": 1.2438558887438907e-05, "loss": 0.1286, "step": 5610 }, { "epoch": 1.32, "learning_rate": 1.2436100957732155e-05, "loss": 0.0237, "step": 5611 }, { "epoch": 1.32, "learning_rate": 1.2433642871555116e-05, "loss": 0.0241, "step": 5612 }, { "epoch": 1.32, "learning_rate": 1.2431184629065664e-05, "loss": 0.0139, "step": 5613 }, { "epoch": 1.32, "learning_rate": 1.2428726230421701e-05, "loss": 0.0742, "step": 5614 }, { "epoch": 1.32, "learning_rate": 1.242626767578112e-05, "loss": 0.0437, "step": 5615 }, { "epoch": 1.32, "learning_rate": 1.2423808965301838e-05, "loss": 0.0644, "step": 5616 }, { "epoch": 1.32, "learning_rate": 1.2421350099141779e-05, "loss": 0.0183, "step": 5617 }, { "epoch": 1.32, "learning_rate": 1.2418891077458872e-05, "loss": 0.0281, "step": 5618 }, { "epoch": 1.32, "learning_rate": 1.2416431900411061e-05, "loss": 0.0211, "step": 5619 }, { "epoch": 1.32, "learning_rate": 1.2413972568156301e-05, "loss": 0.0099, "step": 5620 }, { "epoch": 1.32, "learning_rate": 1.241151308085255e-05, "loss": 0.0378, "step": 5621 }, { "epoch": 1.32, "learning_rate": 1.2409053438657783e-05, "loss": 0.0367, "step": 5622 }, { "epoch": 1.32, "learning_rate": 1.2406593641729982e-05, "loss": 0.0659, "step": 5623 }, { "epoch": 1.32, "learning_rate": 1.240413369022714e-05, "loss": 0.0506, "step": 5624 }, { "epoch": 1.32, "learning_rate": 1.2401673584307254e-05, "loss": 0.0249, "step": 5625 }, { "epoch": 1.32, "learning_rate": 1.2399213324128345e-05, "loss": 0.0304, "step": 5626 }, { "epoch": 1.32, "learning_rate": 1.2396752909848428e-05, "loss": 0.0311, "step": 5627 }, { "epoch": 1.32, "learning_rate": 1.239429234162554e-05, "loss": 0.0468, "step": 5628 }, { "epoch": 1.32, "learning_rate": 1.2391831619617718e-05, "loss": 0.0145, "step": 5629 }, { "epoch": 1.32, "learning_rate": 1.2389370743983019e-05, "loss": 0.0266, "step": 5630 }, { "epoch": 1.32, "learning_rate": 1.2386909714879498e-05, "loss": 0.0077, "step": 5631 }, { "epoch": 1.32, "learning_rate": 1.2384448532465232e-05, "loss": 0.0197, "step": 5632 }, { "epoch": 1.32, "learning_rate": 1.23819871968983e-05, "loss": 0.0596, "step": 5633 }, { "epoch": 1.32, "learning_rate": 1.237952570833679e-05, "loss": 0.0573, "step": 5634 }, { "epoch": 1.32, "learning_rate": 1.2377064066938808e-05, "loss": 0.0531, "step": 5635 }, { "epoch": 1.32, "learning_rate": 1.2374602272862466e-05, "loss": 0.0048, "step": 5636 }, { "epoch": 1.32, "learning_rate": 1.2372140326265877e-05, "loss": 0.0375, "step": 5637 }, { "epoch": 1.32, "learning_rate": 1.2369678227307181e-05, "loss": 0.0325, "step": 5638 }, { "epoch": 1.32, "learning_rate": 1.236721597614451e-05, "loss": 0.0177, "step": 5639 }, { "epoch": 1.32, "learning_rate": 1.2364753572936017e-05, "loss": 0.0518, "step": 5640 }, { "epoch": 1.32, "learning_rate": 1.236229101783986e-05, "loss": 0.0266, "step": 5641 }, { "epoch": 1.32, "learning_rate": 1.2359828311014213e-05, "loss": 0.033, "step": 5642 }, { "epoch": 1.32, "learning_rate": 1.235736545261725e-05, "loss": 0.0582, "step": 5643 }, { "epoch": 1.32, "learning_rate": 1.2354902442807164e-05, "loss": 0.0663, "step": 5644 }, { "epoch": 1.32, "learning_rate": 1.2352439281742151e-05, "loss": 0.0076, "step": 5645 }, { "epoch": 1.33, "learning_rate": 1.2349975969580419e-05, "loss": 0.0408, "step": 5646 }, { "epoch": 1.33, "learning_rate": 1.2347512506480187e-05, "loss": 0.0801, "step": 5647 }, { "epoch": 1.33, "learning_rate": 1.2345048892599687e-05, "loss": 0.0057, "step": 5648 }, { "epoch": 1.33, "learning_rate": 1.2342585128097148e-05, "loss": 0.0441, "step": 5649 }, { "epoch": 1.33, "learning_rate": 1.2340121213130825e-05, "loss": 0.0531, "step": 5650 }, { "epoch": 1.33, "learning_rate": 1.2337657147858965e-05, "loss": 0.1243, "step": 5651 }, { "epoch": 1.33, "learning_rate": 1.2335192932439845e-05, "loss": 0.0383, "step": 5652 }, { "epoch": 1.33, "learning_rate": 1.2332728567031737e-05, "loss": 0.0094, "step": 5653 }, { "epoch": 1.33, "learning_rate": 1.2330264051792928e-05, "loss": 0.0369, "step": 5654 }, { "epoch": 1.33, "learning_rate": 1.2327799386881709e-05, "loss": 0.01, "step": 5655 }, { "epoch": 1.33, "learning_rate": 1.2325334572456389e-05, "loss": 0.019, "step": 5656 }, { "epoch": 1.33, "learning_rate": 1.2322869608675281e-05, "loss": 0.016, "step": 5657 }, { "epoch": 1.33, "learning_rate": 1.2320404495696712e-05, "loss": 0.0314, "step": 5658 }, { "epoch": 1.33, "learning_rate": 1.2317939233679008e-05, "loss": 0.0534, "step": 5659 }, { "epoch": 1.33, "learning_rate": 1.2315473822780519e-05, "loss": 0.0361, "step": 5660 }, { "epoch": 1.33, "learning_rate": 1.2313008263159598e-05, "loss": 0.0291, "step": 5661 }, { "epoch": 1.33, "learning_rate": 1.2310542554974606e-05, "loss": 0.0471, "step": 5662 }, { "epoch": 1.33, "learning_rate": 1.2308076698383916e-05, "loss": 0.0763, "step": 5663 }, { "epoch": 1.33, "learning_rate": 1.2305610693545907e-05, "loss": 0.0481, "step": 5664 }, { "epoch": 1.33, "learning_rate": 1.2303144540618968e-05, "loss": 0.0864, "step": 5665 }, { "epoch": 1.33, "learning_rate": 1.2300678239761507e-05, "loss": 0.0889, "step": 5666 }, { "epoch": 1.33, "learning_rate": 1.2298211791131926e-05, "loss": 0.0818, "step": 5667 }, { "epoch": 1.33, "learning_rate": 1.229574519488865e-05, "loss": 0.0509, "step": 5668 }, { "epoch": 1.33, "learning_rate": 1.2293278451190107e-05, "loss": 0.0216, "step": 5669 }, { "epoch": 1.33, "learning_rate": 1.2290811560194736e-05, "loss": 0.0725, "step": 5670 }, { "epoch": 1.33, "learning_rate": 1.2288344522060982e-05, "loss": 0.0471, "step": 5671 }, { "epoch": 1.33, "learning_rate": 1.2285877336947305e-05, "loss": 0.0522, "step": 5672 }, { "epoch": 1.33, "learning_rate": 1.2283410005012169e-05, "loss": 0.0483, "step": 5673 }, { "epoch": 1.33, "learning_rate": 1.2280942526414054e-05, "loss": 0.0499, "step": 5674 }, { "epoch": 1.33, "learning_rate": 1.2278474901311444e-05, "loss": 0.0266, "step": 5675 }, { "epoch": 1.33, "learning_rate": 1.2276007129862834e-05, "loss": 0.016, "step": 5676 }, { "epoch": 1.33, "learning_rate": 1.2273539212226724e-05, "loss": 0.0295, "step": 5677 }, { "epoch": 1.33, "learning_rate": 1.2271071148561638e-05, "loss": 0.0204, "step": 5678 }, { "epoch": 1.33, "learning_rate": 1.2268602939026089e-05, "loss": 0.0037, "step": 5679 }, { "epoch": 1.33, "learning_rate": 1.226613458377862e-05, "loss": 0.0259, "step": 5680 }, { "epoch": 1.33, "learning_rate": 1.2263666082977761e-05, "loss": 0.0036, "step": 5681 }, { "epoch": 1.33, "learning_rate": 1.2261197436782072e-05, "loss": 0.1222, "step": 5682 }, { "epoch": 1.33, "learning_rate": 1.2258728645350111e-05, "loss": 0.0438, "step": 5683 }, { "epoch": 1.33, "learning_rate": 1.225625970884045e-05, "loss": 0.0037, "step": 5684 }, { "epoch": 1.33, "learning_rate": 1.2253790627411662e-05, "loss": 0.0087, "step": 5685 }, { "epoch": 1.33, "learning_rate": 1.2251321401222343e-05, "loss": 0.084, "step": 5686 }, { "epoch": 1.33, "learning_rate": 1.2248852030431086e-05, "loss": 0.0212, "step": 5687 }, { "epoch": 1.33, "learning_rate": 1.2246382515196504e-05, "loss": 0.0419, "step": 5688 }, { "epoch": 1.34, "learning_rate": 1.2243912855677204e-05, "loss": 0.0662, "step": 5689 }, { "epoch": 1.34, "learning_rate": 1.2241443052031823e-05, "loss": 0.0052, "step": 5690 }, { "epoch": 1.34, "learning_rate": 1.2238973104418988e-05, "loss": 0.0346, "step": 5691 }, { "epoch": 1.34, "learning_rate": 1.2236503012997347e-05, "loss": 0.0334, "step": 5692 }, { "epoch": 1.34, "learning_rate": 1.223403277792555e-05, "loss": 0.0112, "step": 5693 }, { "epoch": 1.34, "learning_rate": 1.223156239936226e-05, "loss": 0.0073, "step": 5694 }, { "epoch": 1.34, "learning_rate": 1.2229091877466153e-05, "loss": 0.0402, "step": 5695 }, { "epoch": 1.34, "learning_rate": 1.222662121239591e-05, "loss": 0.0113, "step": 5696 }, { "epoch": 1.34, "learning_rate": 1.2224150404310217e-05, "loss": 0.0025, "step": 5697 }, { "epoch": 1.34, "learning_rate": 1.222167945336778e-05, "loss": 0.0969, "step": 5698 }, { "epoch": 1.34, "learning_rate": 1.2219208359727297e-05, "loss": 0.0243, "step": 5699 }, { "epoch": 1.34, "learning_rate": 1.2216737123547495e-05, "loss": 0.0729, "step": 5700 }, { "epoch": 1.34, "learning_rate": 1.2214265744987099e-05, "loss": 0.0391, "step": 5701 }, { "epoch": 1.34, "learning_rate": 1.2211794224204845e-05, "loss": 0.0932, "step": 5702 }, { "epoch": 1.34, "learning_rate": 1.2209322561359473e-05, "loss": 0.0721, "step": 5703 }, { "epoch": 1.34, "learning_rate": 1.2206850756609749e-05, "loss": 0.0261, "step": 5704 }, { "epoch": 1.34, "learning_rate": 1.2204378810114426e-05, "loss": 0.1334, "step": 5705 }, { "epoch": 1.34, "learning_rate": 1.2201906722032283e-05, "loss": 0.049, "step": 5706 }, { "epoch": 1.34, "learning_rate": 1.2199434492522095e-05, "loss": 0.0225, "step": 5707 }, { "epoch": 1.34, "learning_rate": 1.2196962121742661e-05, "loss": 0.0305, "step": 5708 }, { "epoch": 1.34, "learning_rate": 1.2194489609852775e-05, "loss": 0.0076, "step": 5709 }, { "epoch": 1.34, "learning_rate": 1.219201695701125e-05, "loss": 0.0068, "step": 5710 }, { "epoch": 1.34, "learning_rate": 1.2189544163376897e-05, "loss": 0.0306, "step": 5711 }, { "epoch": 1.34, "learning_rate": 1.2187071229108554e-05, "loss": 0.0155, "step": 5712 }, { "epoch": 1.34, "learning_rate": 1.2184598154365046e-05, "loss": 0.0386, "step": 5713 }, { "epoch": 1.34, "learning_rate": 1.2182124939305227e-05, "loss": 0.0641, "step": 5714 }, { "epoch": 1.34, "learning_rate": 1.2179651584087944e-05, "loss": 0.0492, "step": 5715 }, { "epoch": 1.34, "learning_rate": 1.2177178088872066e-05, "loss": 0.0361, "step": 5716 }, { "epoch": 1.34, "learning_rate": 1.217470445381646e-05, "loss": 0.0482, "step": 5717 }, { "epoch": 1.34, "learning_rate": 1.2172230679080013e-05, "loss": 0.0493, "step": 5718 }, { "epoch": 1.34, "learning_rate": 1.2169756764821608e-05, "loss": 0.0368, "step": 5719 }, { "epoch": 1.34, "learning_rate": 1.2167282711200147e-05, "loss": 0.0761, "step": 5720 }, { "epoch": 1.34, "learning_rate": 1.2164808518374542e-05, "loss": 0.0064, "step": 5721 }, { "epoch": 1.34, "learning_rate": 1.2162334186503706e-05, "loss": 0.0624, "step": 5722 }, { "epoch": 1.34, "learning_rate": 1.2159859715746565e-05, "loss": 0.064, "step": 5723 }, { "epoch": 1.34, "learning_rate": 1.2157385106262054e-05, "loss": 0.0363, "step": 5724 }, { "epoch": 1.34, "learning_rate": 1.2154910358209117e-05, "loss": 0.0122, "step": 5725 }, { "epoch": 1.34, "learning_rate": 1.2152435471746709e-05, "loss": 0.0107, "step": 5726 }, { "epoch": 1.34, "learning_rate": 1.2149960447033784e-05, "loss": 0.054, "step": 5727 }, { "epoch": 1.34, "learning_rate": 1.214748528422932e-05, "loss": 0.0179, "step": 5728 }, { "epoch": 1.34, "learning_rate": 1.2145009983492292e-05, "loss": 0.0096, "step": 5729 }, { "epoch": 1.34, "learning_rate": 1.2142534544981692e-05, "loss": 0.0665, "step": 5730 }, { "epoch": 1.35, "learning_rate": 1.2140058968856512e-05, "loss": 0.0274, "step": 5731 }, { "epoch": 1.35, "learning_rate": 1.2137583255275765e-05, "loss": 0.0157, "step": 5732 }, { "epoch": 1.35, "learning_rate": 1.2135107404398456e-05, "loss": 0.0182, "step": 5733 }, { "epoch": 1.35, "learning_rate": 1.2132631416383615e-05, "loss": 0.0366, "step": 5734 }, { "epoch": 1.35, "learning_rate": 1.2130155291390274e-05, "loss": 0.0819, "step": 5735 }, { "epoch": 1.35, "learning_rate": 1.2127679029577472e-05, "loss": 0.0183, "step": 5736 }, { "epoch": 1.35, "learning_rate": 1.2125202631104257e-05, "loss": 0.0076, "step": 5737 }, { "epoch": 1.35, "learning_rate": 1.212272609612969e-05, "loss": 0.0018, "step": 5738 }, { "epoch": 1.35, "learning_rate": 1.2120249424812843e-05, "loss": 0.0348, "step": 5739 }, { "epoch": 1.35, "learning_rate": 1.2117772617312783e-05, "loss": 0.0349, "step": 5740 }, { "epoch": 1.35, "learning_rate": 1.2115295673788601e-05, "loss": 0.0397, "step": 5741 }, { "epoch": 1.35, "learning_rate": 1.2112818594399392e-05, "loss": 0.0291, "step": 5742 }, { "epoch": 1.35, "learning_rate": 1.2110341379304251e-05, "loss": 0.0811, "step": 5743 }, { "epoch": 1.35, "learning_rate": 1.2107864028662295e-05, "loss": 0.0756, "step": 5744 }, { "epoch": 1.35, "learning_rate": 1.2105386542632643e-05, "loss": 0.0125, "step": 5745 }, { "epoch": 1.35, "learning_rate": 1.210290892137442e-05, "loss": 0.0721, "step": 5746 }, { "epoch": 1.35, "learning_rate": 1.2100431165046767e-05, "loss": 0.0266, "step": 5747 }, { "epoch": 1.35, "learning_rate": 1.209795327380883e-05, "loss": 0.023, "step": 5748 }, { "epoch": 1.35, "learning_rate": 1.209547524781976e-05, "loss": 0.038, "step": 5749 }, { "epoch": 1.35, "learning_rate": 1.2092997087238723e-05, "loss": 0.0339, "step": 5750 }, { "epoch": 1.35, "learning_rate": 1.2090518792224887e-05, "loss": 0.0499, "step": 5751 }, { "epoch": 1.35, "learning_rate": 1.2088040362937439e-05, "loss": 0.0144, "step": 5752 }, { "epoch": 1.35, "learning_rate": 1.2085561799535563e-05, "loss": 0.0407, "step": 5753 }, { "epoch": 1.35, "learning_rate": 1.2083083102178457e-05, "loss": 0.0847, "step": 5754 }, { "epoch": 1.35, "learning_rate": 1.2080604271025329e-05, "loss": 0.0041, "step": 5755 }, { "epoch": 1.35, "learning_rate": 1.2078125306235393e-05, "loss": 0.0006, "step": 5756 }, { "epoch": 1.35, "learning_rate": 1.2075646207967872e-05, "loss": 0.0114, "step": 5757 }, { "epoch": 1.35, "learning_rate": 1.2073166976382e-05, "loss": 0.0166, "step": 5758 }, { "epoch": 1.35, "learning_rate": 1.2070687611637015e-05, "loss": 0.0081, "step": 5759 }, { "epoch": 1.35, "learning_rate": 1.2068208113892168e-05, "loss": 0.0511, "step": 5760 }, { "epoch": 1.35, "learning_rate": 1.2065728483306715e-05, "loss": 0.0044, "step": 5761 }, { "epoch": 1.35, "learning_rate": 1.2063248720039925e-05, "loss": 0.0029, "step": 5762 }, { "epoch": 1.35, "learning_rate": 1.2060768824251063e-05, "loss": 0.0643, "step": 5763 }, { "epoch": 1.35, "learning_rate": 1.205828879609943e-05, "loss": 0.0167, "step": 5764 }, { "epoch": 1.35, "learning_rate": 1.2055808635744301e-05, "loss": 0.0242, "step": 5765 }, { "epoch": 1.35, "learning_rate": 1.2053328343344985e-05, "loss": 0.002, "step": 5766 }, { "epoch": 1.35, "learning_rate": 1.2050847919060789e-05, "loss": 0.001, "step": 5767 }, { "epoch": 1.35, "learning_rate": 1.204836736305103e-05, "loss": 0.0692, "step": 5768 }, { "epoch": 1.35, "learning_rate": 1.2045886675475034e-05, "loss": 0.0175, "step": 5769 }, { "epoch": 1.35, "learning_rate": 1.2043405856492134e-05, "loss": 0.0447, "step": 5770 }, { "epoch": 1.35, "learning_rate": 1.2040924906261668e-05, "loss": 0.0598, "step": 5771 }, { "epoch": 1.35, "learning_rate": 1.2038443824942998e-05, "loss": 0.0711, "step": 5772 }, { "epoch": 1.35, "learning_rate": 1.2035962612695476e-05, "loss": 0.0023, "step": 5773 }, { "epoch": 1.36, "learning_rate": 1.203348126967847e-05, "loss": 0.0091, "step": 5774 }, { "epoch": 1.36, "learning_rate": 1.2030999796051357e-05, "loss": 0.0602, "step": 5775 }, { "epoch": 1.36, "learning_rate": 1.2028518191973523e-05, "loss": 0.0644, "step": 5776 }, { "epoch": 1.36, "learning_rate": 1.2026036457604359e-05, "loss": 0.0018, "step": 5777 }, { "epoch": 1.36, "learning_rate": 1.2023554593103268e-05, "loss": 0.0016, "step": 5778 }, { "epoch": 1.36, "learning_rate": 1.2021072598629657e-05, "loss": 0.0444, "step": 5779 }, { "epoch": 1.36, "learning_rate": 1.2018590474342948e-05, "loss": 0.0497, "step": 5780 }, { "epoch": 1.36, "learning_rate": 1.2016108220402562e-05, "loss": 0.0473, "step": 5781 }, { "epoch": 1.36, "learning_rate": 1.201362583696794e-05, "loss": 0.0408, "step": 5782 }, { "epoch": 1.36, "learning_rate": 1.201114332419852e-05, "loss": 0.0709, "step": 5783 }, { "epoch": 1.36, "learning_rate": 1.2008660682253756e-05, "loss": 0.0476, "step": 5784 }, { "epoch": 1.36, "learning_rate": 1.200617791129311e-05, "loss": 0.0031, "step": 5785 }, { "epoch": 1.36, "learning_rate": 1.2003695011476042e-05, "loss": 0.0525, "step": 5786 }, { "epoch": 1.36, "learning_rate": 1.2001211982962033e-05, "loss": 0.0286, "step": 5787 }, { "epoch": 1.36, "learning_rate": 1.1998728825910569e-05, "loss": 0.029, "step": 5788 }, { "epoch": 1.36, "learning_rate": 1.1996245540481138e-05, "loss": 0.0315, "step": 5789 }, { "epoch": 1.36, "learning_rate": 1.1993762126833248e-05, "loss": 0.005, "step": 5790 }, { "epoch": 1.36, "learning_rate": 1.19912785851264e-05, "loss": 0.0255, "step": 5791 }, { "epoch": 1.36, "learning_rate": 1.1988794915520121e-05, "loss": 0.0275, "step": 5792 }, { "epoch": 1.36, "learning_rate": 1.1986311118173926e-05, "loss": 0.0199, "step": 5793 }, { "epoch": 1.36, "learning_rate": 1.1983827193247358e-05, "loss": 0.0751, "step": 5794 }, { "epoch": 1.36, "learning_rate": 1.198134314089995e-05, "loss": 0.0487, "step": 5795 }, { "epoch": 1.36, "learning_rate": 1.1978858961291263e-05, "loss": 0.0268, "step": 5796 }, { "epoch": 1.36, "learning_rate": 1.1976374654580843e-05, "loss": 0.0135, "step": 5797 }, { "epoch": 1.36, "learning_rate": 1.1973890220928266e-05, "loss": 0.0044, "step": 5798 }, { "epoch": 1.36, "learning_rate": 1.1971405660493105e-05, "loss": 0.0227, "step": 5799 }, { "epoch": 1.36, "learning_rate": 1.1968920973434942e-05, "loss": 0.0138, "step": 5800 }, { "epoch": 1.36, "learning_rate": 1.1966436159913364e-05, "loss": 0.0945, "step": 5801 }, { "epoch": 1.36, "learning_rate": 1.1963951220087978e-05, "loss": 0.0132, "step": 5802 }, { "epoch": 1.36, "learning_rate": 1.1961466154118384e-05, "loss": 0.1186, "step": 5803 }, { "epoch": 1.36, "learning_rate": 1.1958980962164202e-05, "loss": 0.0192, "step": 5804 }, { "epoch": 1.36, "learning_rate": 1.195649564438505e-05, "loss": 0.0116, "step": 5805 }, { "epoch": 1.36, "learning_rate": 1.1954010200940564e-05, "loss": 0.0506, "step": 5806 }, { "epoch": 1.36, "learning_rate": 1.1951524631990384e-05, "loss": 0.0259, "step": 5807 }, { "epoch": 1.36, "learning_rate": 1.1949038937694157e-05, "loss": 0.041, "step": 5808 }, { "epoch": 1.36, "learning_rate": 1.1946553118211537e-05, "loss": 0.0337, "step": 5809 }, { "epoch": 1.36, "learning_rate": 1.194406717370219e-05, "loss": 0.0277, "step": 5810 }, { "epoch": 1.36, "learning_rate": 1.1941581104325783e-05, "loss": 0.0186, "step": 5811 }, { "epoch": 1.36, "learning_rate": 1.1939094910242003e-05, "loss": 0.0234, "step": 5812 }, { "epoch": 1.36, "learning_rate": 1.193660859161053e-05, "loss": 0.0439, "step": 5813 }, { "epoch": 1.36, "learning_rate": 1.1934122148591063e-05, "loss": 0.0405, "step": 5814 }, { "epoch": 1.36, "learning_rate": 1.1931635581343308e-05, "loss": 0.0235, "step": 5815 }, { "epoch": 1.36, "learning_rate": 1.1929148890026976e-05, "loss": 0.0288, "step": 5816 }, { "epoch": 1.37, "learning_rate": 1.1926662074801784e-05, "loss": 0.0063, "step": 5817 }, { "epoch": 1.37, "learning_rate": 1.1924175135827462e-05, "loss": 0.0369, "step": 5818 }, { "epoch": 1.37, "learning_rate": 1.1921688073263746e-05, "loss": 0.053, "step": 5819 }, { "epoch": 1.37, "learning_rate": 1.191920088727038e-05, "loss": 0.1118, "step": 5820 }, { "epoch": 1.37, "learning_rate": 1.1916713578007109e-05, "loss": 0.1628, "step": 5821 }, { "epoch": 1.37, "learning_rate": 1.1914226145633703e-05, "loss": 0.0596, "step": 5822 }, { "epoch": 1.37, "learning_rate": 1.1911738590309918e-05, "loss": 0.0237, "step": 5823 }, { "epoch": 1.37, "learning_rate": 1.1909250912195539e-05, "loss": 0.0399, "step": 5824 }, { "epoch": 1.37, "learning_rate": 1.1906763111450342e-05, "loss": 0.0748, "step": 5825 }, { "epoch": 1.37, "learning_rate": 1.1904275188234125e-05, "loss": 0.0189, "step": 5826 }, { "epoch": 1.37, "learning_rate": 1.1901787142706677e-05, "loss": 0.038, "step": 5827 }, { "epoch": 1.37, "learning_rate": 1.1899298975027817e-05, "loss": 0.0283, "step": 5828 }, { "epoch": 1.37, "learning_rate": 1.1896810685357348e-05, "loss": 0.0308, "step": 5829 }, { "epoch": 1.37, "learning_rate": 1.18943222738551e-05, "loss": 0.0271, "step": 5830 }, { "epoch": 1.37, "learning_rate": 1.18918337406809e-05, "loss": 0.0766, "step": 5831 }, { "epoch": 1.37, "learning_rate": 1.1889345085994584e-05, "loss": 0.0892, "step": 5832 }, { "epoch": 1.37, "learning_rate": 1.1886856309956002e-05, "loss": 0.076, "step": 5833 }, { "epoch": 1.37, "learning_rate": 1.1884367412725009e-05, "loss": 0.0425, "step": 5834 }, { "epoch": 1.37, "learning_rate": 1.1881878394461458e-05, "loss": 0.0215, "step": 5835 }, { "epoch": 1.37, "learning_rate": 1.1879389255325229e-05, "loss": 0.0343, "step": 5836 }, { "epoch": 1.37, "learning_rate": 1.1876899995476188e-05, "loss": 0.0166, "step": 5837 }, { "epoch": 1.37, "learning_rate": 1.187441061507423e-05, "loss": 0.0583, "step": 5838 }, { "epoch": 1.37, "learning_rate": 1.1871921114279239e-05, "loss": 0.0172, "step": 5839 }, { "epoch": 1.37, "learning_rate": 1.1869431493251117e-05, "loss": 0.0157, "step": 5840 }, { "epoch": 1.37, "learning_rate": 1.1866941752149777e-05, "loss": 0.0074, "step": 5841 }, { "epoch": 1.37, "learning_rate": 1.1864451891135131e-05, "loss": 0.0209, "step": 5842 }, { "epoch": 1.37, "learning_rate": 1.1861961910367101e-05, "loss": 0.0351, "step": 5843 }, { "epoch": 1.37, "learning_rate": 1.185947181000562e-05, "loss": 0.05, "step": 5844 }, { "epoch": 1.37, "learning_rate": 1.1856981590210626e-05, "loss": 0.0259, "step": 5845 }, { "epoch": 1.37, "learning_rate": 1.1854491251142066e-05, "loss": 0.0409, "step": 5846 }, { "epoch": 1.37, "learning_rate": 1.1852000792959893e-05, "loss": 0.0152, "step": 5847 }, { "epoch": 1.37, "learning_rate": 1.1849510215824069e-05, "loss": 0.0045, "step": 5848 }, { "epoch": 1.37, "learning_rate": 1.1847019519894563e-05, "loss": 0.1051, "step": 5849 }, { "epoch": 1.37, "learning_rate": 1.184452870533135e-05, "loss": 0.0392, "step": 5850 }, { "epoch": 1.37, "learning_rate": 1.184203777229442e-05, "loss": 0.0064, "step": 5851 }, { "epoch": 1.37, "learning_rate": 1.1839546720943762e-05, "loss": 0.0514, "step": 5852 }, { "epoch": 1.37, "learning_rate": 1.1837055551439373e-05, "loss": 0.0145, "step": 5853 }, { "epoch": 1.37, "learning_rate": 1.1834564263941267e-05, "loss": 0.0178, "step": 5854 }, { "epoch": 1.37, "learning_rate": 1.1832072858609451e-05, "loss": 0.0134, "step": 5855 }, { "epoch": 1.37, "learning_rate": 1.1829581335603953e-05, "loss": 0.023, "step": 5856 }, { "epoch": 1.37, "learning_rate": 1.1827089695084802e-05, "loss": 0.0538, "step": 5857 }, { "epoch": 1.37, "learning_rate": 1.1824597937212033e-05, "loss": 0.035, "step": 5858 }, { "epoch": 1.38, "learning_rate": 1.182210606214569e-05, "loss": 0.0253, "step": 5859 }, { "epoch": 1.38, "learning_rate": 1.1819614070045835e-05, "loss": 0.0535, "step": 5860 }, { "epoch": 1.38, "learning_rate": 1.181712196107252e-05, "loss": 0.0094, "step": 5861 }, { "epoch": 1.38, "learning_rate": 1.1814629735385814e-05, "loss": 0.0467, "step": 5862 }, { "epoch": 1.38, "learning_rate": 1.1812137393145793e-05, "loss": 0.0386, "step": 5863 }, { "epoch": 1.38, "learning_rate": 1.1809644934512541e-05, "loss": 0.004, "step": 5864 }, { "epoch": 1.38, "learning_rate": 1.1807152359646144e-05, "loss": 0.0862, "step": 5865 }, { "epoch": 1.38, "learning_rate": 1.1804659668706701e-05, "loss": 0.0448, "step": 5866 }, { "epoch": 1.38, "learning_rate": 1.1802166861854324e-05, "loss": 0.0944, "step": 5867 }, { "epoch": 1.38, "learning_rate": 1.1799673939249115e-05, "loss": 0.0694, "step": 5868 }, { "epoch": 1.38, "learning_rate": 1.1797180901051198e-05, "loss": 0.0141, "step": 5869 }, { "epoch": 1.38, "learning_rate": 1.1794687747420706e-05, "loss": 0.0057, "step": 5870 }, { "epoch": 1.38, "learning_rate": 1.1792194478517762e-05, "loss": 0.0066, "step": 5871 }, { "epoch": 1.38, "learning_rate": 1.1789701094502522e-05, "loss": 0.0319, "step": 5872 }, { "epoch": 1.38, "learning_rate": 1.1787207595535123e-05, "loss": 0.0666, "step": 5873 }, { "epoch": 1.38, "learning_rate": 1.1784713981775728e-05, "loss": 0.0206, "step": 5874 }, { "epoch": 1.38, "learning_rate": 1.1782220253384504e-05, "loss": 0.0624, "step": 5875 }, { "epoch": 1.38, "learning_rate": 1.1779726410521617e-05, "loss": 0.0952, "step": 5876 }, { "epoch": 1.38, "learning_rate": 1.1777232453347252e-05, "loss": 0.0141, "step": 5877 }, { "epoch": 1.38, "learning_rate": 1.1774738382021586e-05, "loss": 0.0116, "step": 5878 }, { "epoch": 1.38, "learning_rate": 1.1772244196704822e-05, "loss": 0.1039, "step": 5879 }, { "epoch": 1.38, "learning_rate": 1.1769749897557159e-05, "loss": 0.0016, "step": 5880 }, { "epoch": 1.38, "learning_rate": 1.1767255484738802e-05, "loss": 0.0218, "step": 5881 }, { "epoch": 1.38, "learning_rate": 1.176476095840997e-05, "loss": 0.0528, "step": 5882 }, { "epoch": 1.38, "learning_rate": 1.1762266318730884e-05, "loss": 0.006, "step": 5883 }, { "epoch": 1.38, "learning_rate": 1.1759771565861773e-05, "loss": 0.1653, "step": 5884 }, { "epoch": 1.38, "learning_rate": 1.175727669996288e-05, "loss": 0.0499, "step": 5885 }, { "epoch": 1.38, "learning_rate": 1.1754781721194444e-05, "loss": 0.0032, "step": 5886 }, { "epoch": 1.38, "learning_rate": 1.1752286629716717e-05, "loss": 0.033, "step": 5887 }, { "epoch": 1.38, "learning_rate": 1.1749791425689965e-05, "loss": 0.0342, "step": 5888 }, { "epoch": 1.38, "learning_rate": 1.1747296109274444e-05, "loss": 0.0245, "step": 5889 }, { "epoch": 1.38, "learning_rate": 1.174480068063044e-05, "loss": 0.0211, "step": 5890 }, { "epoch": 1.38, "learning_rate": 1.1742305139918221e-05, "loss": 0.0129, "step": 5891 }, { "epoch": 1.38, "learning_rate": 1.1739809487298085e-05, "loss": 0.0749, "step": 5892 }, { "epoch": 1.38, "learning_rate": 1.1737313722930326e-05, "loss": 0.063, "step": 5893 }, { "epoch": 1.38, "learning_rate": 1.1734817846975239e-05, "loss": 0.0137, "step": 5894 }, { "epoch": 1.38, "learning_rate": 1.1732321859593143e-05, "loss": 0.0134, "step": 5895 }, { "epoch": 1.38, "learning_rate": 1.172982576094435e-05, "loss": 0.0421, "step": 5896 }, { "epoch": 1.38, "learning_rate": 1.1727329551189182e-05, "loss": 0.0303, "step": 5897 }, { "epoch": 1.38, "learning_rate": 1.1724833230487975e-05, "loss": 0.0445, "step": 5898 }, { "epoch": 1.38, "learning_rate": 1.1722336799001064e-05, "loss": 0.0698, "step": 5899 }, { "epoch": 1.38, "learning_rate": 1.1719840256888795e-05, "loss": 0.0026, "step": 5900 }, { "epoch": 1.38, "learning_rate": 1.1717343604311522e-05, "loss": 0.0033, "step": 5901 }, { "epoch": 1.39, "learning_rate": 1.1714846841429602e-05, "loss": 0.0158, "step": 5902 }, { "epoch": 1.39, "learning_rate": 1.1712349968403402e-05, "loss": 0.041, "step": 5903 }, { "epoch": 1.39, "learning_rate": 1.1709852985393296e-05, "loss": 0.0043, "step": 5904 }, { "epoch": 1.39, "learning_rate": 1.1707355892559665e-05, "loss": 0.0587, "step": 5905 }, { "epoch": 1.39, "learning_rate": 1.17048586900629e-05, "loss": 0.0113, "step": 5906 }, { "epoch": 1.39, "learning_rate": 1.170236137806339e-05, "loss": 0.0697, "step": 5907 }, { "epoch": 1.39, "learning_rate": 1.169986395672154e-05, "loss": 0.0176, "step": 5908 }, { "epoch": 1.39, "learning_rate": 1.1697366426197756e-05, "loss": 0.0035, "step": 5909 }, { "epoch": 1.39, "learning_rate": 1.1694868786652457e-05, "loss": 0.0055, "step": 5910 }, { "epoch": 1.39, "learning_rate": 1.1692371038246071e-05, "loss": 0.0463, "step": 5911 }, { "epoch": 1.39, "learning_rate": 1.1689873181139015e-05, "loss": 0.028, "step": 5912 }, { "epoch": 1.39, "learning_rate": 1.1687375215491739e-05, "loss": 0.0034, "step": 5913 }, { "epoch": 1.39, "learning_rate": 1.1684877141464676e-05, "loss": 0.0512, "step": 5914 }, { "epoch": 1.39, "learning_rate": 1.1682378959218284e-05, "loss": 0.0155, "step": 5915 }, { "epoch": 1.39, "learning_rate": 1.1679880668913018e-05, "loss": 0.0356, "step": 5916 }, { "epoch": 1.39, "learning_rate": 1.1677382270709341e-05, "loss": 0.0045, "step": 5917 }, { "epoch": 1.39, "learning_rate": 1.1674883764767728e-05, "loss": 0.0337, "step": 5918 }, { "epoch": 1.39, "learning_rate": 1.167238515124866e-05, "loss": 0.0055, "step": 5919 }, { "epoch": 1.39, "learning_rate": 1.1669886430312614e-05, "loss": 0.0586, "step": 5920 }, { "epoch": 1.39, "learning_rate": 1.166738760212009e-05, "loss": 0.0474, "step": 5921 }, { "epoch": 1.39, "learning_rate": 1.1664888666831585e-05, "loss": 0.0087, "step": 5922 }, { "epoch": 1.39, "learning_rate": 1.16623896246076e-05, "loss": 0.0008, "step": 5923 }, { "epoch": 1.39, "learning_rate": 1.1659890475608657e-05, "loss": 0.0141, "step": 5924 }, { "epoch": 1.39, "learning_rate": 1.165739121999527e-05, "loss": 0.0424, "step": 5925 }, { "epoch": 1.39, "learning_rate": 1.1654891857927964e-05, "loss": 0.0153, "step": 5926 }, { "epoch": 1.39, "learning_rate": 1.165239238956728e-05, "loss": 0.0666, "step": 5927 }, { "epoch": 1.39, "learning_rate": 1.1649892815073752e-05, "loss": 0.0547, "step": 5928 }, { "epoch": 1.39, "learning_rate": 1.1647393134607929e-05, "loss": 0.0168, "step": 5929 }, { "epoch": 1.39, "learning_rate": 1.1644893348330367e-05, "loss": 0.0063, "step": 5930 }, { "epoch": 1.39, "learning_rate": 1.1642393456401621e-05, "loss": 0.0371, "step": 5931 }, { "epoch": 1.39, "learning_rate": 1.1639893458982266e-05, "loss": 0.0431, "step": 5932 }, { "epoch": 1.39, "learning_rate": 1.1637393356232871e-05, "loss": 0.0416, "step": 5933 }, { "epoch": 1.39, "learning_rate": 1.163489314831402e-05, "loss": 0.0038, "step": 5934 }, { "epoch": 1.39, "learning_rate": 1.1632392835386299e-05, "loss": 0.0184, "step": 5935 }, { "epoch": 1.39, "learning_rate": 1.1629892417610304e-05, "loss": 0.0858, "step": 5936 }, { "epoch": 1.39, "learning_rate": 1.1627391895146636e-05, "loss": 0.0096, "step": 5937 }, { "epoch": 1.39, "learning_rate": 1.1624891268155903e-05, "loss": 0.0696, "step": 5938 }, { "epoch": 1.39, "learning_rate": 1.162239053679872e-05, "loss": 0.0093, "step": 5939 }, { "epoch": 1.39, "learning_rate": 1.1619889701235704e-05, "loss": 0.0035, "step": 5940 }, { "epoch": 1.39, "learning_rate": 1.1617388761627493e-05, "loss": 0.0126, "step": 5941 }, { "epoch": 1.39, "learning_rate": 1.1614887718134713e-05, "loss": 0.0074, "step": 5942 }, { "epoch": 1.39, "learning_rate": 1.161238657091801e-05, "loss": 0.0237, "step": 5943 }, { "epoch": 1.4, "learning_rate": 1.160988532013803e-05, "loss": 0.0612, "step": 5944 }, { "epoch": 1.4, "learning_rate": 1.160738396595543e-05, "loss": 0.0343, "step": 5945 }, { "epoch": 1.4, "learning_rate": 1.1604882508530869e-05, "loss": 0.0289, "step": 5946 }, { "epoch": 1.4, "learning_rate": 1.1602380948025019e-05, "loss": 0.0478, "step": 5947 }, { "epoch": 1.4, "learning_rate": 1.1599879284598547e-05, "loss": 0.0454, "step": 5948 }, { "epoch": 1.4, "learning_rate": 1.1597377518412145e-05, "loss": 0.0216, "step": 5949 }, { "epoch": 1.4, "learning_rate": 1.159487564962649e-05, "loss": 0.0218, "step": 5950 }, { "epoch": 1.4, "learning_rate": 1.1592373678402285e-05, "loss": 0.0293, "step": 5951 }, { "epoch": 1.4, "learning_rate": 1.1589871604900227e-05, "loss": 0.0387, "step": 5952 }, { "epoch": 1.4, "learning_rate": 1.1587369429281028e-05, "loss": 0.0209, "step": 5953 }, { "epoch": 1.4, "learning_rate": 1.1584867151705394e-05, "loss": 0.0343, "step": 5954 }, { "epoch": 1.4, "learning_rate": 1.1582364772334056e-05, "loss": 0.0685, "step": 5955 }, { "epoch": 1.4, "learning_rate": 1.1579862291327733e-05, "loss": 0.0578, "step": 5956 }, { "epoch": 1.4, "learning_rate": 1.1577359708847165e-05, "loss": 0.0059, "step": 5957 }, { "epoch": 1.4, "learning_rate": 1.1574857025053085e-05, "loss": 0.0177, "step": 5958 }, { "epoch": 1.4, "learning_rate": 1.157235424010625e-05, "loss": 0.0564, "step": 5959 }, { "epoch": 1.4, "learning_rate": 1.1569851354167401e-05, "loss": 0.0062, "step": 5960 }, { "epoch": 1.4, "learning_rate": 1.1567348367397308e-05, "loss": 0.0479, "step": 5961 }, { "epoch": 1.4, "learning_rate": 1.1564845279956733e-05, "loss": 0.0107, "step": 5962 }, { "epoch": 1.4, "learning_rate": 1.1562342092006453e-05, "loss": 0.0223, "step": 5963 }, { "epoch": 1.4, "learning_rate": 1.1559838803707242e-05, "loss": 0.0176, "step": 5964 }, { "epoch": 1.4, "learning_rate": 1.1557335415219889e-05, "loss": 0.0515, "step": 5965 }, { "epoch": 1.4, "learning_rate": 1.1554831926705181e-05, "loss": 0.0496, "step": 5966 }, { "epoch": 1.4, "learning_rate": 1.1552328338323925e-05, "loss": 0.0171, "step": 5967 }, { "epoch": 1.4, "learning_rate": 1.154982465023692e-05, "loss": 0.0058, "step": 5968 }, { "epoch": 1.4, "learning_rate": 1.1547320862604979e-05, "loss": 0.002, "step": 5969 }, { "epoch": 1.4, "learning_rate": 1.1544816975588917e-05, "loss": 0.0272, "step": 5970 }, { "epoch": 1.4, "learning_rate": 1.1542312989349564e-05, "loss": 0.0211, "step": 5971 }, { "epoch": 1.4, "learning_rate": 1.1539808904047747e-05, "loss": 0.0341, "step": 5972 }, { "epoch": 1.4, "learning_rate": 1.1537304719844302e-05, "loss": 0.0131, "step": 5973 }, { "epoch": 1.4, "learning_rate": 1.1534800436900075e-05, "loss": 0.0109, "step": 5974 }, { "epoch": 1.4, "learning_rate": 1.1532296055375916e-05, "loss": 0.0361, "step": 5975 }, { "epoch": 1.4, "learning_rate": 1.1529791575432676e-05, "loss": 0.0437, "step": 5976 }, { "epoch": 1.4, "learning_rate": 1.152728699723122e-05, "loss": 0.0576, "step": 5977 }, { "epoch": 1.4, "learning_rate": 1.1524782320932418e-05, "loss": 0.0588, "step": 5978 }, { "epoch": 1.4, "learning_rate": 1.1522277546697145e-05, "loss": 0.0273, "step": 5979 }, { "epoch": 1.4, "learning_rate": 1.151977267468628e-05, "loss": 0.0116, "step": 5980 }, { "epoch": 1.4, "learning_rate": 1.1517267705060714e-05, "loss": 0.0579, "step": 5981 }, { "epoch": 1.4, "learning_rate": 1.1514762637981334e-05, "loss": 0.0075, "step": 5982 }, { "epoch": 1.4, "learning_rate": 1.1512257473609047e-05, "loss": 0.0355, "step": 5983 }, { "epoch": 1.4, "learning_rate": 1.1509752212104754e-05, "loss": 0.1104, "step": 5984 }, { "epoch": 1.4, "learning_rate": 1.1507246853629372e-05, "loss": 0.0329, "step": 5985 }, { "epoch": 1.4, "learning_rate": 1.1504741398343816e-05, "loss": 0.1052, "step": 5986 }, { "epoch": 1.41, "learning_rate": 1.1502235846409013e-05, "loss": 0.0543, "step": 5987 }, { "epoch": 1.41, "learning_rate": 1.1499730197985893e-05, "loss": 0.0298, "step": 5988 }, { "epoch": 1.41, "learning_rate": 1.1497224453235397e-05, "loss": 0.0272, "step": 5989 }, { "epoch": 1.41, "learning_rate": 1.149471861231846e-05, "loss": 0.0593, "step": 5990 }, { "epoch": 1.41, "learning_rate": 1.1492212675396041e-05, "loss": 0.0434, "step": 5991 }, { "epoch": 1.41, "learning_rate": 1.1489706642629092e-05, "loss": 0.0309, "step": 5992 }, { "epoch": 1.41, "learning_rate": 1.1487200514178576e-05, "loss": 0.0304, "step": 5993 }, { "epoch": 1.41, "learning_rate": 1.1484694290205454e-05, "loss": 0.0244, "step": 5994 }, { "epoch": 1.41, "learning_rate": 1.148218797087071e-05, "loss": 0.0389, "step": 5995 }, { "epoch": 1.41, "learning_rate": 1.147968155633532e-05, "loss": 0.0153, "step": 5996 }, { "epoch": 1.41, "learning_rate": 1.1477175046760273e-05, "loss": 0.0226, "step": 5997 }, { "epoch": 1.41, "learning_rate": 1.1474668442306557e-05, "loss": 0.0149, "step": 5998 }, { "epoch": 1.41, "learning_rate": 1.1472161743135178e-05, "loss": 0.0573, "step": 5999 }, { "epoch": 1.41, "learning_rate": 1.1469654949407134e-05, "loss": 0.0177, "step": 6000 }, { "epoch": 1.41, "learning_rate": 1.1467148061283439e-05, "loss": 0.0297, "step": 6001 }, { "epoch": 1.41, "learning_rate": 1.1464641078925108e-05, "loss": 0.0124, "step": 6002 }, { "epoch": 1.41, "learning_rate": 1.1462134002493163e-05, "loss": 0.031, "step": 6003 }, { "epoch": 1.41, "learning_rate": 1.1459626832148636e-05, "loss": 0.0122, "step": 6004 }, { "epoch": 1.41, "learning_rate": 1.1457119568052564e-05, "loss": 0.0082, "step": 6005 }, { "epoch": 1.41, "learning_rate": 1.1454612210365982e-05, "loss": 0.0092, "step": 6006 }, { "epoch": 1.41, "learning_rate": 1.1452104759249945e-05, "loss": 0.0281, "step": 6007 }, { "epoch": 1.41, "learning_rate": 1.1449597214865497e-05, "loss": 0.0155, "step": 6008 }, { "epoch": 1.41, "learning_rate": 1.1447089577373707e-05, "loss": 0.0067, "step": 6009 }, { "epoch": 1.41, "learning_rate": 1.144458184693563e-05, "loss": 0.0054, "step": 6010 }, { "epoch": 1.41, "learning_rate": 1.1442074023712344e-05, "loss": 0.0079, "step": 6011 }, { "epoch": 1.41, "learning_rate": 1.143956610786492e-05, "loss": 0.0143, "step": 6012 }, { "epoch": 1.41, "learning_rate": 1.1437058099554448e-05, "loss": 0.0159, "step": 6013 }, { "epoch": 1.41, "learning_rate": 1.143454999894201e-05, "loss": 0.0032, "step": 6014 }, { "epoch": 1.41, "learning_rate": 1.1432041806188709e-05, "loss": 0.0169, "step": 6015 }, { "epoch": 1.41, "learning_rate": 1.142953352145564e-05, "loss": 0.0149, "step": 6016 }, { "epoch": 1.41, "learning_rate": 1.1427025144903911e-05, "loss": 0.001, "step": 6017 }, { "epoch": 1.41, "learning_rate": 1.1424516676694633e-05, "loss": 0.0171, "step": 6018 }, { "epoch": 1.41, "learning_rate": 1.1422008116988928e-05, "loss": 0.0161, "step": 6019 }, { "epoch": 1.41, "learning_rate": 1.1419499465947912e-05, "loss": 0.0247, "step": 6020 }, { "epoch": 1.41, "learning_rate": 1.1416990723732726e-05, "loss": 0.041, "step": 6021 }, { "epoch": 1.41, "learning_rate": 1.1414481890504501e-05, "loss": 0.0812, "step": 6022 }, { "epoch": 1.41, "learning_rate": 1.1411972966424378e-05, "loss": 0.0337, "step": 6023 }, { "epoch": 1.41, "learning_rate": 1.1409463951653505e-05, "loss": 0.0084, "step": 6024 }, { "epoch": 1.41, "learning_rate": 1.140695484635304e-05, "loss": 0.0655, "step": 6025 }, { "epoch": 1.41, "learning_rate": 1.1404445650684136e-05, "loss": 0.0137, "step": 6026 }, { "epoch": 1.41, "learning_rate": 1.1401936364807961e-05, "loss": 0.0354, "step": 6027 }, { "epoch": 1.41, "learning_rate": 1.1399426988885684e-05, "loss": 0.0432, "step": 6028 }, { "epoch": 1.41, "learning_rate": 1.1396917523078485e-05, "loss": 0.0029, "step": 6029 }, { "epoch": 1.42, "learning_rate": 1.1394407967547544e-05, "loss": 0.0067, "step": 6030 }, { "epoch": 1.42, "learning_rate": 1.1391898322454053e-05, "loss": 0.0581, "step": 6031 }, { "epoch": 1.42, "learning_rate": 1.1389388587959203e-05, "loss": 0.1056, "step": 6032 }, { "epoch": 1.42, "learning_rate": 1.1386878764224192e-05, "loss": 0.0034, "step": 6033 }, { "epoch": 1.42, "learning_rate": 1.1384368851410229e-05, "loss": 0.0301, "step": 6034 }, { "epoch": 1.42, "learning_rate": 1.1381858849678525e-05, "loss": 0.0715, "step": 6035 }, { "epoch": 1.42, "learning_rate": 1.1379348759190292e-05, "loss": 0.1134, "step": 6036 }, { "epoch": 1.42, "learning_rate": 1.1376838580106756e-05, "loss": 0.0082, "step": 6037 }, { "epoch": 1.42, "learning_rate": 1.1374328312589146e-05, "loss": 0.0087, "step": 6038 }, { "epoch": 1.42, "learning_rate": 1.1371817956798698e-05, "loss": 0.0035, "step": 6039 }, { "epoch": 1.42, "learning_rate": 1.1369307512896648e-05, "loss": 0.053, "step": 6040 }, { "epoch": 1.42, "learning_rate": 1.1366796981044246e-05, "loss": 0.0373, "step": 6041 }, { "epoch": 1.42, "learning_rate": 1.1364286361402735e-05, "loss": 0.0175, "step": 6042 }, { "epoch": 1.42, "learning_rate": 1.1361775654133377e-05, "loss": 0.041, "step": 6043 }, { "epoch": 1.42, "learning_rate": 1.1359264859397434e-05, "loss": 0.0274, "step": 6044 }, { "epoch": 1.42, "learning_rate": 1.1356753977356174e-05, "loss": 0.1128, "step": 6045 }, { "epoch": 1.42, "learning_rate": 1.1354243008170866e-05, "loss": 0.0212, "step": 6046 }, { "epoch": 1.42, "learning_rate": 1.1351731952002798e-05, "loss": 0.0342, "step": 6047 }, { "epoch": 1.42, "learning_rate": 1.1349220809013246e-05, "loss": 0.026, "step": 6048 }, { "epoch": 1.42, "learning_rate": 1.1346709579363506e-05, "loss": 0.0238, "step": 6049 }, { "epoch": 1.42, "learning_rate": 1.134419826321487e-05, "loss": 0.0035, "step": 6050 }, { "epoch": 1.42, "learning_rate": 1.1341686860728642e-05, "loss": 0.0568, "step": 6051 }, { "epoch": 1.42, "learning_rate": 1.1339175372066125e-05, "loss": 0.1011, "step": 6052 }, { "epoch": 1.42, "learning_rate": 1.1336663797388639e-05, "loss": 0.0374, "step": 6053 }, { "epoch": 1.42, "learning_rate": 1.1334152136857493e-05, "loss": 0.0272, "step": 6054 }, { "epoch": 1.42, "learning_rate": 1.1331640390634014e-05, "loss": 0.034, "step": 6055 }, { "epoch": 1.42, "learning_rate": 1.1329128558879531e-05, "loss": 0.0423, "step": 6056 }, { "epoch": 1.42, "learning_rate": 1.1326616641755383e-05, "loss": 0.0224, "step": 6057 }, { "epoch": 1.42, "learning_rate": 1.1324104639422902e-05, "loss": 0.0234, "step": 6058 }, { "epoch": 1.42, "learning_rate": 1.132159255204344e-05, "loss": 0.0414, "step": 6059 }, { "epoch": 1.42, "learning_rate": 1.1319080379778345e-05, "loss": 0.0466, "step": 6060 }, { "epoch": 1.42, "learning_rate": 1.1316568122788973e-05, "loss": 0.0025, "step": 6061 }, { "epoch": 1.42, "learning_rate": 1.1314055781236686e-05, "loss": 0.0122, "step": 6062 }, { "epoch": 1.42, "learning_rate": 1.1311543355282853e-05, "loss": 0.0685, "step": 6063 }, { "epoch": 1.42, "learning_rate": 1.1309030845088842e-05, "loss": 0.0298, "step": 6064 }, { "epoch": 1.42, "learning_rate": 1.1306518250816036e-05, "loss": 0.0174, "step": 6065 }, { "epoch": 1.42, "learning_rate": 1.1304005572625818e-05, "loss": 0.0018, "step": 6066 }, { "epoch": 1.42, "learning_rate": 1.1301492810679575e-05, "loss": 0.0527, "step": 6067 }, { "epoch": 1.42, "learning_rate": 1.1298979965138699e-05, "loss": 0.0085, "step": 6068 }, { "epoch": 1.42, "learning_rate": 1.1296467036164598e-05, "loss": 0.0544, "step": 6069 }, { "epoch": 1.42, "learning_rate": 1.1293954023918665e-05, "loss": 0.0353, "step": 6070 }, { "epoch": 1.42, "learning_rate": 1.1291440928562321e-05, "loss": 0.0281, "step": 6071 }, { "epoch": 1.43, "learning_rate": 1.1288927750256973e-05, "loss": 0.051, "step": 6072 }, { "epoch": 1.43, "learning_rate": 1.128641448916405e-05, "loss": 0.0969, "step": 6073 }, { "epoch": 1.43, "learning_rate": 1.1283901145444971e-05, "loss": 0.1167, "step": 6074 }, { "epoch": 1.43, "learning_rate": 1.1281387719261174e-05, "loss": 0.0493, "step": 6075 }, { "epoch": 1.43, "learning_rate": 1.1278874210774092e-05, "loss": 0.0306, "step": 6076 }, { "epoch": 1.43, "learning_rate": 1.1276360620145169e-05, "loss": 0.0355, "step": 6077 }, { "epoch": 1.43, "learning_rate": 1.1273846947535847e-05, "loss": 0.01, "step": 6078 }, { "epoch": 1.43, "learning_rate": 1.1271333193107588e-05, "loss": 0.0087, "step": 6079 }, { "epoch": 1.43, "learning_rate": 1.1268819357021841e-05, "loss": 0.0245, "step": 6080 }, { "epoch": 1.43, "learning_rate": 1.1266305439440078e-05, "loss": 0.0136, "step": 6081 }, { "epoch": 1.43, "learning_rate": 1.1263791440523758e-05, "loss": 0.0367, "step": 6082 }, { "epoch": 1.43, "learning_rate": 1.1261277360434361e-05, "loss": 0.067, "step": 6083 }, { "epoch": 1.43, "learning_rate": 1.1258763199333366e-05, "loss": 0.0408, "step": 6084 }, { "epoch": 1.43, "learning_rate": 1.1256248957382254e-05, "loss": 0.1069, "step": 6085 }, { "epoch": 1.43, "learning_rate": 1.1253734634742516e-05, "loss": 0.0718, "step": 6086 }, { "epoch": 1.43, "learning_rate": 1.1251220231575648e-05, "loss": 0.0161, "step": 6087 }, { "epoch": 1.43, "learning_rate": 1.1248705748043146e-05, "loss": 0.0232, "step": 6088 }, { "epoch": 1.43, "learning_rate": 1.1246191184306513e-05, "loss": 0.0479, "step": 6089 }, { "epoch": 1.43, "learning_rate": 1.1243676540527265e-05, "loss": 0.0518, "step": 6090 }, { "epoch": 1.43, "learning_rate": 1.1241161816866917e-05, "loss": 0.0421, "step": 6091 }, { "epoch": 1.43, "learning_rate": 1.1238647013486983e-05, "loss": 0.0117, "step": 6092 }, { "epoch": 1.43, "learning_rate": 1.1236132130548995e-05, "loss": 0.0429, "step": 6093 }, { "epoch": 1.43, "learning_rate": 1.1233617168214479e-05, "loss": 0.0371, "step": 6094 }, { "epoch": 1.43, "learning_rate": 1.1231102126644973e-05, "loss": 0.0087, "step": 6095 }, { "epoch": 1.43, "learning_rate": 1.1228587006002013e-05, "loss": 0.0137, "step": 6096 }, { "epoch": 1.43, "learning_rate": 1.1226071806447152e-05, "loss": 0.0526, "step": 6097 }, { "epoch": 1.43, "learning_rate": 1.1223556528141938e-05, "loss": 0.0427, "step": 6098 }, { "epoch": 1.43, "learning_rate": 1.1221041171247924e-05, "loss": 0.015, "step": 6099 }, { "epoch": 1.43, "learning_rate": 1.1218525735926673e-05, "loss": 0.047, "step": 6100 }, { "epoch": 1.43, "learning_rate": 1.1216010222339753e-05, "loss": 0.0173, "step": 6101 }, { "epoch": 1.43, "learning_rate": 1.1213494630648729e-05, "loss": 0.0073, "step": 6102 }, { "epoch": 1.43, "learning_rate": 1.1210978961015184e-05, "loss": 0.0119, "step": 6103 }, { "epoch": 1.43, "learning_rate": 1.1208463213600693e-05, "loss": 0.0325, "step": 6104 }, { "epoch": 1.43, "learning_rate": 1.120594738856685e-05, "loss": 0.0023, "step": 6105 }, { "epoch": 1.43, "learning_rate": 1.1203431486075232e-05, "loss": 0.0236, "step": 6106 }, { "epoch": 1.43, "learning_rate": 1.1200915506287448e-05, "loss": 0.0915, "step": 6107 }, { "epoch": 1.43, "learning_rate": 1.1198399449365093e-05, "loss": 0.0474, "step": 6108 }, { "epoch": 1.43, "learning_rate": 1.1195883315469778e-05, "loss": 0.0126, "step": 6109 }, { "epoch": 1.43, "learning_rate": 1.1193367104763107e-05, "loss": 0.0462, "step": 6110 }, { "epoch": 1.43, "learning_rate": 1.1190850817406703e-05, "loss": 0.0392, "step": 6111 }, { "epoch": 1.43, "learning_rate": 1.1188334453562178e-05, "loss": 0.0062, "step": 6112 }, { "epoch": 1.43, "learning_rate": 1.1185818013391164e-05, "loss": 0.0056, "step": 6113 }, { "epoch": 1.43, "learning_rate": 1.1183301497055287e-05, "loss": 0.0013, "step": 6114 }, { "epoch": 1.44, "learning_rate": 1.1180784904716185e-05, "loss": 0.0573, "step": 6115 }, { "epoch": 1.44, "learning_rate": 1.1178268236535499e-05, "loss": 0.0124, "step": 6116 }, { "epoch": 1.44, "learning_rate": 1.1175751492674876e-05, "loss": 0.1004, "step": 6117 }, { "epoch": 1.44, "learning_rate": 1.1173234673295959e-05, "loss": 0.0098, "step": 6118 }, { "epoch": 1.44, "learning_rate": 1.117071777856041e-05, "loss": 0.0069, "step": 6119 }, { "epoch": 1.44, "learning_rate": 1.1168200808629884e-05, "loss": 0.0683, "step": 6120 }, { "epoch": 1.44, "learning_rate": 1.1165683763666046e-05, "loss": 0.0374, "step": 6121 }, { "epoch": 1.44, "learning_rate": 1.1163166643830567e-05, "loss": 0.0269, "step": 6122 }, { "epoch": 1.44, "learning_rate": 1.1160649449285122e-05, "loss": 0.0253, "step": 6123 }, { "epoch": 1.44, "learning_rate": 1.1158132180191386e-05, "loss": 0.039, "step": 6124 }, { "epoch": 1.44, "learning_rate": 1.1155614836711048e-05, "loss": 0.0429, "step": 6125 }, { "epoch": 1.44, "learning_rate": 1.1153097419005791e-05, "loss": 0.0582, "step": 6126 }, { "epoch": 1.44, "learning_rate": 1.1150579927237315e-05, "loss": 0.0172, "step": 6127 }, { "epoch": 1.44, "learning_rate": 1.1148062361567312e-05, "loss": 0.0122, "step": 6128 }, { "epoch": 1.44, "learning_rate": 1.1145544722157486e-05, "loss": 0.036, "step": 6129 }, { "epoch": 1.44, "learning_rate": 1.1143027009169545e-05, "loss": 0.0076, "step": 6130 }, { "epoch": 1.44, "learning_rate": 1.11405092227652e-05, "loss": 0.0652, "step": 6131 }, { "epoch": 1.44, "learning_rate": 1.1137991363106168e-05, "loss": 0.0205, "step": 6132 }, { "epoch": 1.44, "learning_rate": 1.1135473430354177e-05, "loss": 0.0255, "step": 6133 }, { "epoch": 1.44, "learning_rate": 1.1132955424670944e-05, "loss": 0.0087, "step": 6134 }, { "epoch": 1.44, "learning_rate": 1.1130437346218207e-05, "loss": 0.0705, "step": 6135 }, { "epoch": 1.44, "learning_rate": 1.11279191951577e-05, "loss": 0.0236, "step": 6136 }, { "epoch": 1.44, "learning_rate": 1.1125400971651159e-05, "loss": 0.0767, "step": 6137 }, { "epoch": 1.44, "learning_rate": 1.1122882675860334e-05, "loss": 0.0528, "step": 6138 }, { "epoch": 1.44, "learning_rate": 1.1120364307946974e-05, "loss": 0.0375, "step": 6139 }, { "epoch": 1.44, "learning_rate": 1.1117845868072828e-05, "loss": 0.0996, "step": 6140 }, { "epoch": 1.44, "learning_rate": 1.1115327356399664e-05, "loss": 0.0969, "step": 6141 }, { "epoch": 1.44, "learning_rate": 1.1112808773089236e-05, "loss": 0.007, "step": 6142 }, { "epoch": 1.44, "learning_rate": 1.1110290118303322e-05, "loss": 0.0332, "step": 6143 }, { "epoch": 1.44, "learning_rate": 1.1107771392203686e-05, "loss": 0.0053, "step": 6144 }, { "epoch": 1.44, "learning_rate": 1.1105252594952109e-05, "loss": 0.0544, "step": 6145 }, { "epoch": 1.44, "learning_rate": 1.1102733726710374e-05, "loss": 0.0383, "step": 6146 }, { "epoch": 1.44, "learning_rate": 1.1100214787640267e-05, "loss": 0.0437, "step": 6147 }, { "epoch": 1.44, "learning_rate": 1.1097695777903575e-05, "loss": 0.0224, "step": 6148 }, { "epoch": 1.44, "learning_rate": 1.1095176697662095e-05, "loss": 0.077, "step": 6149 }, { "epoch": 1.44, "learning_rate": 1.1092657547077632e-05, "loss": 0.025, "step": 6150 }, { "epoch": 1.44, "learning_rate": 1.1090138326311987e-05, "loss": 0.027, "step": 6151 }, { "epoch": 1.44, "learning_rate": 1.1087619035526968e-05, "loss": 0.0225, "step": 6152 }, { "epoch": 1.44, "learning_rate": 1.1085099674884392e-05, "loss": 0.0155, "step": 6153 }, { "epoch": 1.44, "learning_rate": 1.1082580244546072e-05, "loss": 0.0441, "step": 6154 }, { "epoch": 1.44, "learning_rate": 1.1080060744673835e-05, "loss": 0.0285, "step": 6155 }, { "epoch": 1.44, "learning_rate": 1.1077541175429504e-05, "loss": 0.0142, "step": 6156 }, { "epoch": 1.45, "learning_rate": 1.1075021536974916e-05, "loss": 0.0505, "step": 6157 }, { "epoch": 1.45, "learning_rate": 1.10725018294719e-05, "loss": 0.0714, "step": 6158 }, { "epoch": 1.45, "learning_rate": 1.1069982053082304e-05, "loss": 0.0512, "step": 6159 }, { "epoch": 1.45, "learning_rate": 1.1067462207967966e-05, "loss": 0.0806, "step": 6160 }, { "epoch": 1.45, "learning_rate": 1.106494229429074e-05, "loss": 0.0701, "step": 6161 }, { "epoch": 1.45, "learning_rate": 1.1062422312212475e-05, "loss": 0.0328, "step": 6162 }, { "epoch": 1.45, "learning_rate": 1.1059902261895033e-05, "loss": 0.0409, "step": 6163 }, { "epoch": 1.45, "learning_rate": 1.1057382143500272e-05, "loss": 0.013, "step": 6164 }, { "epoch": 1.45, "learning_rate": 1.1054861957190066e-05, "loss": 0.0382, "step": 6165 }, { "epoch": 1.45, "learning_rate": 1.1052341703126274e-05, "loss": 0.0052, "step": 6166 }, { "epoch": 1.45, "learning_rate": 1.1049821381470786e-05, "loss": 0.0152, "step": 6167 }, { "epoch": 1.45, "learning_rate": 1.1047300992385473e-05, "loss": 0.014, "step": 6168 }, { "epoch": 1.45, "learning_rate": 1.104478053603222e-05, "loss": 0.0108, "step": 6169 }, { "epoch": 1.45, "learning_rate": 1.1042260012572916e-05, "loss": 0.0146, "step": 6170 }, { "epoch": 1.45, "learning_rate": 1.1039739422169458e-05, "loss": 0.0309, "step": 6171 }, { "epoch": 1.45, "learning_rate": 1.1037218764983733e-05, "loss": 0.0409, "step": 6172 }, { "epoch": 1.45, "learning_rate": 1.1034698041177652e-05, "loss": 0.036, "step": 6173 }, { "epoch": 1.45, "learning_rate": 1.1032177250913114e-05, "loss": 0.0411, "step": 6174 }, { "epoch": 1.45, "learning_rate": 1.1029656394352033e-05, "loss": 0.0078, "step": 6175 }, { "epoch": 1.45, "learning_rate": 1.1027135471656323e-05, "loss": 0.0227, "step": 6176 }, { "epoch": 1.45, "learning_rate": 1.1024614482987901e-05, "loss": 0.0245, "step": 6177 }, { "epoch": 1.45, "learning_rate": 1.102209342850869e-05, "loss": 0.0127, "step": 6178 }, { "epoch": 1.45, "learning_rate": 1.1019572308380617e-05, "loss": 0.0231, "step": 6179 }, { "epoch": 1.45, "learning_rate": 1.1017051122765609e-05, "loss": 0.0873, "step": 6180 }, { "epoch": 1.45, "learning_rate": 1.101452987182561e-05, "loss": 0.0244, "step": 6181 }, { "epoch": 1.45, "learning_rate": 1.1012008555722553e-05, "loss": 0.023, "step": 6182 }, { "epoch": 1.45, "learning_rate": 1.1009487174618381e-05, "loss": 0.0189, "step": 6183 }, { "epoch": 1.45, "learning_rate": 1.1006965728675045e-05, "loss": 0.0234, "step": 6184 }, { "epoch": 1.45, "learning_rate": 1.1004444218054498e-05, "loss": 0.1104, "step": 6185 }, { "epoch": 1.45, "learning_rate": 1.1001922642918692e-05, "loss": 0.0658, "step": 6186 }, { "epoch": 1.45, "learning_rate": 1.099940100342959e-05, "loss": 0.0096, "step": 6187 }, { "epoch": 1.45, "learning_rate": 1.0996879299749157e-05, "loss": 0.0472, "step": 6188 }, { "epoch": 1.45, "learning_rate": 1.099435753203936e-05, "loss": 0.1181, "step": 6189 }, { "epoch": 1.45, "learning_rate": 1.099183570046217e-05, "loss": 0.0436, "step": 6190 }, { "epoch": 1.45, "learning_rate": 1.098931380517957e-05, "loss": 0.0148, "step": 6191 }, { "epoch": 1.45, "learning_rate": 1.098679184635353e-05, "loss": 0.0636, "step": 6192 }, { "epoch": 1.45, "learning_rate": 1.0984269824146045e-05, "loss": 0.0415, "step": 6193 }, { "epoch": 1.45, "learning_rate": 1.09817477387191e-05, "loss": 0.0353, "step": 6194 }, { "epoch": 1.45, "learning_rate": 1.097922559023469e-05, "loss": 0.0145, "step": 6195 }, { "epoch": 1.45, "learning_rate": 1.0976703378854807e-05, "loss": 0.0248, "step": 6196 }, { "epoch": 1.45, "learning_rate": 1.0974181104741462e-05, "loss": 0.0835, "step": 6197 }, { "epoch": 1.45, "learning_rate": 1.0971658768056648e-05, "loss": 0.0248, "step": 6198 }, { "epoch": 1.45, "learning_rate": 1.0969136368962383e-05, "loss": 0.038, "step": 6199 }, { "epoch": 1.46, "learning_rate": 1.0966613907620679e-05, "loss": 0.0951, "step": 6200 }, { "epoch": 1.46, "learning_rate": 1.0964091384193548e-05, "loss": 0.0059, "step": 6201 }, { "epoch": 1.46, "learning_rate": 1.0961568798843016e-05, "loss": 0.0176, "step": 6202 }, { "epoch": 1.46, "learning_rate": 1.0959046151731108e-05, "loss": 0.0081, "step": 6203 }, { "epoch": 1.46, "learning_rate": 1.0956523443019848e-05, "loss": 0.0395, "step": 6204 }, { "epoch": 1.46, "learning_rate": 1.095400067287128e-05, "loss": 0.0035, "step": 6205 }, { "epoch": 1.46, "learning_rate": 1.095147784144743e-05, "loss": 0.0107, "step": 6206 }, { "epoch": 1.46, "learning_rate": 1.0948954948910344e-05, "loss": 0.0086, "step": 6207 }, { "epoch": 1.46, "learning_rate": 1.0946431995422065e-05, "loss": 0.035, "step": 6208 }, { "epoch": 1.46, "learning_rate": 1.0943908981144642e-05, "loss": 0.0252, "step": 6209 }, { "epoch": 1.46, "learning_rate": 1.0941385906240132e-05, "loss": 0.0091, "step": 6210 }, { "epoch": 1.46, "learning_rate": 1.0938862770870585e-05, "loss": 0.0106, "step": 6211 }, { "epoch": 1.46, "learning_rate": 1.0936339575198066e-05, "loss": 0.0133, "step": 6212 }, { "epoch": 1.46, "learning_rate": 1.0933816319384637e-05, "loss": 0.0191, "step": 6213 }, { "epoch": 1.46, "learning_rate": 1.0931293003592369e-05, "loss": 0.0349, "step": 6214 }, { "epoch": 1.46, "learning_rate": 1.0928769627983334e-05, "loss": 0.0176, "step": 6215 }, { "epoch": 1.46, "learning_rate": 1.0926246192719602e-05, "loss": 0.0729, "step": 6216 }, { "epoch": 1.46, "learning_rate": 1.0923722697963262e-05, "loss": 0.0326, "step": 6217 }, { "epoch": 1.46, "learning_rate": 1.0921199143876388e-05, "loss": 0.0072, "step": 6218 }, { "epoch": 1.46, "learning_rate": 1.0918675530621072e-05, "loss": 0.0123, "step": 6219 }, { "epoch": 1.46, "learning_rate": 1.0916151858359409e-05, "loss": 0.0354, "step": 6220 }, { "epoch": 1.46, "learning_rate": 1.0913628127253488e-05, "loss": 0.0029, "step": 6221 }, { "epoch": 1.46, "learning_rate": 1.0911104337465408e-05, "loss": 0.0773, "step": 6222 }, { "epoch": 1.46, "learning_rate": 1.090858048915728e-05, "loss": 0.0032, "step": 6223 }, { "epoch": 1.46, "learning_rate": 1.0906056582491195e-05, "loss": 0.0019, "step": 6224 }, { "epoch": 1.46, "learning_rate": 1.090353261762928e-05, "loss": 0.1027, "step": 6225 }, { "epoch": 1.46, "learning_rate": 1.0901008594733635e-05, "loss": 0.0969, "step": 6226 }, { "epoch": 1.46, "learning_rate": 1.0898484513966382e-05, "loss": 0.0259, "step": 6227 }, { "epoch": 1.46, "learning_rate": 1.0895960375489649e-05, "loss": 0.067, "step": 6228 }, { "epoch": 1.46, "learning_rate": 1.0893436179465551e-05, "loss": 0.0621, "step": 6229 }, { "epoch": 1.46, "learning_rate": 1.0890911926056223e-05, "loss": 0.0255, "step": 6230 }, { "epoch": 1.46, "learning_rate": 1.0888387615423794e-05, "loss": 0.0079, "step": 6231 }, { "epoch": 1.46, "learning_rate": 1.0885863247730402e-05, "loss": 0.1067, "step": 6232 }, { "epoch": 1.46, "learning_rate": 1.0883338823138185e-05, "loss": 0.0146, "step": 6233 }, { "epoch": 1.46, "learning_rate": 1.0880814341809288e-05, "loss": 0.0711, "step": 6234 }, { "epoch": 1.46, "learning_rate": 1.0878289803905855e-05, "loss": 0.058, "step": 6235 }, { "epoch": 1.46, "learning_rate": 1.0875765209590042e-05, "loss": 0.0245, "step": 6236 }, { "epoch": 1.46, "learning_rate": 1.0873240559023994e-05, "loss": 0.0165, "step": 6237 }, { "epoch": 1.46, "learning_rate": 1.0870715852369882e-05, "loss": 0.0428, "step": 6238 }, { "epoch": 1.46, "learning_rate": 1.0868191089789855e-05, "loss": 0.0024, "step": 6239 }, { "epoch": 1.46, "learning_rate": 1.0865666271446082e-05, "loss": 0.0569, "step": 6240 }, { "epoch": 1.46, "learning_rate": 1.0863141397500736e-05, "loss": 0.0436, "step": 6241 }, { "epoch": 1.46, "learning_rate": 1.0860616468115985e-05, "loss": 0.0235, "step": 6242 }, { "epoch": 1.47, "learning_rate": 1.0858091483454003e-05, "loss": 0.0308, "step": 6243 }, { "epoch": 1.47, "learning_rate": 1.0855566443676978e-05, "loss": 0.0389, "step": 6244 }, { "epoch": 1.47, "learning_rate": 1.0853041348947079e-05, "loss": 0.0446, "step": 6245 }, { "epoch": 1.47, "learning_rate": 1.0850516199426507e-05, "loss": 0.0503, "step": 6246 }, { "epoch": 1.47, "learning_rate": 1.084799099527744e-05, "loss": 0.0249, "step": 6247 }, { "epoch": 1.47, "learning_rate": 1.0845465736662075e-05, "loss": 0.0161, "step": 6248 }, { "epoch": 1.47, "learning_rate": 1.0842940423742615e-05, "loss": 0.0077, "step": 6249 }, { "epoch": 1.47, "learning_rate": 1.0840415056681252e-05, "loss": 0.0446, "step": 6250 }, { "epoch": 1.47, "learning_rate": 1.0837889635640195e-05, "loss": 0.0048, "step": 6251 }, { "epoch": 1.47, "learning_rate": 1.0835364160781649e-05, "loss": 0.0098, "step": 6252 }, { "epoch": 1.47, "learning_rate": 1.0832838632267824e-05, "loss": 0.1104, "step": 6253 }, { "epoch": 1.47, "learning_rate": 1.0830313050260937e-05, "loss": 0.0085, "step": 6254 }, { "epoch": 1.47, "learning_rate": 1.0827787414923203e-05, "loss": 0.0405, "step": 6255 }, { "epoch": 1.47, "learning_rate": 1.0825261726416848e-05, "loss": 0.0712, "step": 6256 }, { "epoch": 1.47, "learning_rate": 1.0822735984904086e-05, "loss": 0.0075, "step": 6257 }, { "epoch": 1.47, "learning_rate": 1.0820210190547155e-05, "loss": 0.0098, "step": 6258 }, { "epoch": 1.47, "learning_rate": 1.0817684343508285e-05, "loss": 0.0249, "step": 6259 }, { "epoch": 1.47, "learning_rate": 1.0815158443949705e-05, "loss": 0.029, "step": 6260 }, { "epoch": 1.47, "learning_rate": 1.081263249203366e-05, "loss": 0.0339, "step": 6261 }, { "epoch": 1.47, "learning_rate": 1.0810106487922385e-05, "loss": 0.0086, "step": 6262 }, { "epoch": 1.47, "learning_rate": 1.0807580431778128e-05, "loss": 0.0418, "step": 6263 }, { "epoch": 1.47, "learning_rate": 1.080505432376314e-05, "loss": 0.0315, "step": 6264 }, { "epoch": 1.47, "learning_rate": 1.0802528164039665e-05, "loss": 0.0235, "step": 6265 }, { "epoch": 1.47, "learning_rate": 1.0800001952769965e-05, "loss": 0.0286, "step": 6266 }, { "epoch": 1.47, "learning_rate": 1.0797475690116295e-05, "loss": 0.0964, "step": 6267 }, { "epoch": 1.47, "learning_rate": 1.0794949376240913e-05, "loss": 0.012, "step": 6268 }, { "epoch": 1.47, "learning_rate": 1.0792423011306087e-05, "loss": 0.0215, "step": 6269 }, { "epoch": 1.47, "learning_rate": 1.0789896595474091e-05, "loss": 0.0438, "step": 6270 }, { "epoch": 1.47, "learning_rate": 1.0787370128907187e-05, "loss": 0.0379, "step": 6271 }, { "epoch": 1.47, "learning_rate": 1.0784843611767652e-05, "loss": 0.0105, "step": 6272 }, { "epoch": 1.47, "learning_rate": 1.0782317044217763e-05, "loss": 0.0216, "step": 6273 }, { "epoch": 1.47, "learning_rate": 1.0779790426419807e-05, "loss": 0.0321, "step": 6274 }, { "epoch": 1.47, "learning_rate": 1.0777263758536058e-05, "loss": 0.0303, "step": 6275 }, { "epoch": 1.47, "learning_rate": 1.0774737040728812e-05, "loss": 0.0309, "step": 6276 }, { "epoch": 1.47, "learning_rate": 1.0772210273160355e-05, "loss": 0.0489, "step": 6277 }, { "epoch": 1.47, "learning_rate": 1.0769683455992984e-05, "loss": 0.0197, "step": 6278 }, { "epoch": 1.47, "learning_rate": 1.0767156589388991e-05, "loss": 0.0182, "step": 6279 }, { "epoch": 1.47, "learning_rate": 1.0764629673510682e-05, "loss": 0.1004, "step": 6280 }, { "epoch": 1.47, "learning_rate": 1.0762102708520357e-05, "loss": 0.0123, "step": 6281 }, { "epoch": 1.47, "learning_rate": 1.0759575694580325e-05, "loss": 0.0228, "step": 6282 }, { "epoch": 1.47, "learning_rate": 1.075704863185289e-05, "loss": 0.0781, "step": 6283 }, { "epoch": 1.47, "learning_rate": 1.0754521520500371e-05, "loss": 0.0279, "step": 6284 }, { "epoch": 1.48, "learning_rate": 1.075199436068508e-05, "loss": 0.0152, "step": 6285 }, { "epoch": 1.48, "learning_rate": 1.0749467152569339e-05, "loss": 0.0734, "step": 6286 }, { "epoch": 1.48, "learning_rate": 1.0746939896315468e-05, "loss": 0.0039, "step": 6287 }, { "epoch": 1.48, "learning_rate": 1.0744412592085793e-05, "loss": 0.0107, "step": 6288 }, { "epoch": 1.48, "learning_rate": 1.0741885240042639e-05, "loss": 0.008, "step": 6289 }, { "epoch": 1.48, "learning_rate": 1.0739357840348346e-05, "loss": 0.0089, "step": 6290 }, { "epoch": 1.48, "learning_rate": 1.0736830393165236e-05, "loss": 0.0992, "step": 6291 }, { "epoch": 1.48, "learning_rate": 1.0734302898655657e-05, "loss": 0.0211, "step": 6292 }, { "epoch": 1.48, "learning_rate": 1.0731775356981945e-05, "loss": 0.0343, "step": 6293 }, { "epoch": 1.48, "learning_rate": 1.0729247768306443e-05, "loss": 0.0055, "step": 6294 }, { "epoch": 1.48, "learning_rate": 1.0726720132791498e-05, "loss": 0.0347, "step": 6295 }, { "epoch": 1.48, "learning_rate": 1.0724192450599462e-05, "loss": 0.0089, "step": 6296 }, { "epoch": 1.48, "learning_rate": 1.0721664721892683e-05, "loss": 0.0038, "step": 6297 }, { "epoch": 1.48, "learning_rate": 1.0719136946833524e-05, "loss": 0.0443, "step": 6298 }, { "epoch": 1.48, "learning_rate": 1.0716609125584337e-05, "loss": 0.0493, "step": 6299 }, { "epoch": 1.48, "learning_rate": 1.0714081258307482e-05, "loss": 0.027, "step": 6300 }, { "epoch": 1.48, "learning_rate": 1.0711553345165329e-05, "loss": 0.08, "step": 6301 }, { "epoch": 1.48, "learning_rate": 1.0709025386320244e-05, "loss": 0.0622, "step": 6302 }, { "epoch": 1.48, "learning_rate": 1.0706497381934593e-05, "loss": 0.0091, "step": 6303 }, { "epoch": 1.48, "learning_rate": 1.0703969332170753e-05, "loss": 0.0361, "step": 6304 }, { "epoch": 1.48, "learning_rate": 1.0701441237191102e-05, "loss": 0.0021, "step": 6305 }, { "epoch": 1.48, "learning_rate": 1.0698913097158015e-05, "loss": 0.0257, "step": 6306 }, { "epoch": 1.48, "learning_rate": 1.0696384912233875e-05, "loss": 0.0529, "step": 6307 }, { "epoch": 1.48, "learning_rate": 1.069385668258107e-05, "loss": 0.0759, "step": 6308 }, { "epoch": 1.48, "learning_rate": 1.0691328408361982e-05, "loss": 0.077, "step": 6309 }, { "epoch": 1.48, "learning_rate": 1.0688800089739009e-05, "loss": 0.0242, "step": 6310 }, { "epoch": 1.48, "learning_rate": 1.0686271726874536e-05, "loss": 0.1349, "step": 6311 }, { "epoch": 1.48, "learning_rate": 1.0683743319930963e-05, "loss": 0.0107, "step": 6312 }, { "epoch": 1.48, "learning_rate": 1.068121486907069e-05, "loss": 0.0206, "step": 6313 }, { "epoch": 1.48, "learning_rate": 1.0678686374456122e-05, "loss": 0.0792, "step": 6314 }, { "epoch": 1.48, "learning_rate": 1.0676157836249657e-05, "loss": 0.0108, "step": 6315 }, { "epoch": 1.48, "learning_rate": 1.0673629254613706e-05, "loss": 0.05, "step": 6316 }, { "epoch": 1.48, "learning_rate": 1.067110062971068e-05, "loss": 0.0376, "step": 6317 }, { "epoch": 1.48, "learning_rate": 1.066857196170299e-05, "loss": 0.0674, "step": 6318 }, { "epoch": 1.48, "learning_rate": 1.0666043250753054e-05, "loss": 0.0466, "step": 6319 }, { "epoch": 1.48, "learning_rate": 1.066351449702329e-05, "loss": 0.0233, "step": 6320 }, { "epoch": 1.48, "learning_rate": 1.0660985700676112e-05, "loss": 0.0631, "step": 6321 }, { "epoch": 1.48, "learning_rate": 1.0658456861873958e-05, "loss": 0.0606, "step": 6322 }, { "epoch": 1.48, "learning_rate": 1.0655927980779244e-05, "loss": 0.048, "step": 6323 }, { "epoch": 1.48, "learning_rate": 1.0653399057554407e-05, "loss": 0.0534, "step": 6324 }, { "epoch": 1.48, "learning_rate": 1.0650870092361871e-05, "loss": 0.0249, "step": 6325 }, { "epoch": 1.48, "learning_rate": 1.064834108536408e-05, "loss": 0.0377, "step": 6326 }, { "epoch": 1.48, "learning_rate": 1.0645812036723464e-05, "loss": 0.0118, "step": 6327 }, { "epoch": 1.49, "learning_rate": 1.0643282946602469e-05, "loss": 0.0836, "step": 6328 }, { "epoch": 1.49, "learning_rate": 1.0640753815163532e-05, "loss": 0.0253, "step": 6329 }, { "epoch": 1.49, "learning_rate": 1.0638224642569104e-05, "loss": 0.0323, "step": 6330 }, { "epoch": 1.49, "learning_rate": 1.0635695428981632e-05, "loss": 0.0664, "step": 6331 }, { "epoch": 1.49, "learning_rate": 1.0633166174563565e-05, "loss": 0.0341, "step": 6332 }, { "epoch": 1.49, "learning_rate": 1.0630636879477356e-05, "loss": 0.0079, "step": 6333 }, { "epoch": 1.49, "learning_rate": 1.0628107543885466e-05, "loss": 0.0207, "step": 6334 }, { "epoch": 1.49, "learning_rate": 1.062557816795035e-05, "loss": 0.0338, "step": 6335 }, { "epoch": 1.49, "learning_rate": 1.062304875183447e-05, "loss": 0.0137, "step": 6336 }, { "epoch": 1.49, "learning_rate": 1.0620519295700287e-05, "loss": 0.0175, "step": 6337 }, { "epoch": 1.49, "learning_rate": 1.061798979971027e-05, "loss": 0.0075, "step": 6338 }, { "epoch": 1.49, "learning_rate": 1.0615460264026891e-05, "loss": 0.0608, "step": 6339 }, { "epoch": 1.49, "learning_rate": 1.0612930688812619e-05, "loss": 0.0429, "step": 6340 }, { "epoch": 1.49, "learning_rate": 1.0610401074229927e-05, "loss": 0.004, "step": 6341 }, { "epoch": 1.49, "learning_rate": 1.0607871420441293e-05, "loss": 0.0225, "step": 6342 }, { "epoch": 1.49, "learning_rate": 1.0605341727609195e-05, "loss": 0.0581, "step": 6343 }, { "epoch": 1.49, "learning_rate": 1.0602811995896118e-05, "loss": 0.0554, "step": 6344 }, { "epoch": 1.49, "learning_rate": 1.060028222546454e-05, "loss": 0.0254, "step": 6345 }, { "epoch": 1.49, "learning_rate": 1.0597752416476955e-05, "loss": 0.0446, "step": 6346 }, { "epoch": 1.49, "learning_rate": 1.0595222569095845e-05, "loss": 0.0035, "step": 6347 }, { "epoch": 1.49, "learning_rate": 1.0592692683483707e-05, "loss": 0.0131, "step": 6348 }, { "epoch": 1.49, "learning_rate": 1.0590162759803031e-05, "loss": 0.0291, "step": 6349 }, { "epoch": 1.49, "learning_rate": 1.0587632798216318e-05, "loss": 0.0192, "step": 6350 }, { "epoch": 1.49, "learning_rate": 1.0585102798886065e-05, "loss": 0.0253, "step": 6351 }, { "epoch": 1.49, "learning_rate": 1.0582572761974772e-05, "loss": 0.017, "step": 6352 }, { "epoch": 1.49, "learning_rate": 1.0580042687644942e-05, "loss": 0.0203, "step": 6353 }, { "epoch": 1.49, "learning_rate": 1.0577512576059085e-05, "loss": 0.0532, "step": 6354 }, { "epoch": 1.49, "learning_rate": 1.0574982427379705e-05, "loss": 0.0646, "step": 6355 }, { "epoch": 1.49, "learning_rate": 1.057245224176932e-05, "loss": 0.0381, "step": 6356 }, { "epoch": 1.49, "learning_rate": 1.0569922019390434e-05, "loss": 0.0366, "step": 6357 }, { "epoch": 1.49, "learning_rate": 1.0567391760405572e-05, "loss": 0.0354, "step": 6358 }, { "epoch": 1.49, "learning_rate": 1.0564861464977243e-05, "loss": 0.0244, "step": 6359 }, { "epoch": 1.49, "learning_rate": 1.0562331133267976e-05, "loss": 0.049, "step": 6360 }, { "epoch": 1.49, "learning_rate": 1.0559800765440286e-05, "loss": 0.0228, "step": 6361 }, { "epoch": 1.49, "learning_rate": 1.0557270361656705e-05, "loss": 0.0318, "step": 6362 }, { "epoch": 1.49, "learning_rate": 1.0554739922079755e-05, "loss": 0.0686, "step": 6363 }, { "epoch": 1.49, "learning_rate": 1.0552209446871965e-05, "loss": 0.056, "step": 6364 }, { "epoch": 1.49, "learning_rate": 1.0549678936195874e-05, "loss": 0.0416, "step": 6365 }, { "epoch": 1.49, "learning_rate": 1.054714839021401e-05, "loss": 0.047, "step": 6366 }, { "epoch": 1.49, "learning_rate": 1.0544617809088911e-05, "loss": 0.0327, "step": 6367 }, { "epoch": 1.49, "learning_rate": 1.0542087192983118e-05, "loss": 0.0349, "step": 6368 }, { "epoch": 1.49, "learning_rate": 1.0539556542059166e-05, "loss": 0.0258, "step": 6369 }, { "epoch": 1.49, "learning_rate": 1.0537025856479605e-05, "loss": 0.0039, "step": 6370 }, { "epoch": 1.5, "learning_rate": 1.0534495136406977e-05, "loss": 0.0049, "step": 6371 }, { "epoch": 1.5, "learning_rate": 1.053196438200383e-05, "loss": 0.0278, "step": 6372 }, { "epoch": 1.5, "learning_rate": 1.0529433593432712e-05, "loss": 0.025, "step": 6373 }, { "epoch": 1.5, "learning_rate": 1.0526902770856182e-05, "loss": 0.0605, "step": 6374 }, { "epoch": 1.5, "learning_rate": 1.0524371914436786e-05, "loss": 0.0106, "step": 6375 }, { "epoch": 1.5, "learning_rate": 1.0521841024337086e-05, "loss": 0.0139, "step": 6376 }, { "epoch": 1.5, "learning_rate": 1.0519310100719638e-05, "loss": 0.034, "step": 6377 }, { "epoch": 1.5, "learning_rate": 1.0516779143747002e-05, "loss": 0.0419, "step": 6378 }, { "epoch": 1.5, "learning_rate": 1.0514248153581744e-05, "loss": 0.0327, "step": 6379 }, { "epoch": 1.5, "learning_rate": 1.0511717130386426e-05, "loss": 0.0046, "step": 6380 }, { "epoch": 1.5, "learning_rate": 1.0509186074323612e-05, "loss": 0.0122, "step": 6381 }, { "epoch": 1.5, "learning_rate": 1.050665498555588e-05, "loss": 0.0312, "step": 6382 }, { "epoch": 1.5, "learning_rate": 1.0504123864245795e-05, "loss": 0.0218, "step": 6383 }, { "epoch": 1.5, "learning_rate": 1.0501592710555935e-05, "loss": 0.0479, "step": 6384 }, { "epoch": 1.5, "learning_rate": 1.0499061524648868e-05, "loss": 0.0112, "step": 6385 }, { "epoch": 1.5, "learning_rate": 1.049653030668718e-05, "loss": 0.0398, "step": 6386 }, { "epoch": 1.5, "learning_rate": 1.0493999056833446e-05, "loss": 0.0371, "step": 6387 }, { "epoch": 1.5, "learning_rate": 1.049146777525025e-05, "loss": 0.0491, "step": 6388 }, { "epoch": 1.5, "learning_rate": 1.048893646210017e-05, "loss": 0.0283, "step": 6389 }, { "epoch": 1.5, "learning_rate": 1.04864051175458e-05, "loss": 0.0239, "step": 6390 }, { "epoch": 1.5, "learning_rate": 1.048387374174972e-05, "loss": 0.0087, "step": 6391 }, { "epoch": 1.5, "learning_rate": 1.0481342334874529e-05, "loss": 0.0054, "step": 6392 }, { "epoch": 1.5, "learning_rate": 1.0478810897082809e-05, "loss": 0.0857, "step": 6393 }, { "epoch": 1.5, "learning_rate": 1.0476279428537162e-05, "loss": 0.027, "step": 6394 }, { "epoch": 1.5, "learning_rate": 1.0473747929400177e-05, "loss": 0.0095, "step": 6395 }, { "epoch": 1.5, "learning_rate": 1.0471216399834457e-05, "loss": 0.0447, "step": 6396 }, { "epoch": 1.5, "learning_rate": 1.0468684840002596e-05, "loss": 0.0038, "step": 6397 }, { "epoch": 1.5, "learning_rate": 1.0466153250067199e-05, "loss": 0.017, "step": 6398 }, { "epoch": 1.5, "learning_rate": 1.046362163019087e-05, "loss": 0.0405, "step": 6399 }, { "epoch": 1.5, "learning_rate": 1.0461089980536214e-05, "loss": 0.0659, "step": 6400 }, { "epoch": 1.5, "learning_rate": 1.045855830126584e-05, "loss": 0.0278, "step": 6401 }, { "epoch": 1.5, "learning_rate": 1.0456026592542355e-05, "loss": 0.0025, "step": 6402 }, { "epoch": 1.5, "learning_rate": 1.0453494854528368e-05, "loss": 0.0016, "step": 6403 }, { "epoch": 1.5, "learning_rate": 1.0450963087386496e-05, "loss": 0.0032, "step": 6404 }, { "epoch": 1.5, "learning_rate": 1.0448431291279353e-05, "loss": 0.0133, "step": 6405 }, { "epoch": 1.5, "learning_rate": 1.0445899466369556e-05, "loss": 0.0126, "step": 6406 }, { "epoch": 1.5, "learning_rate": 1.044336761281972e-05, "loss": 0.035, "step": 6407 }, { "epoch": 1.5, "learning_rate": 1.0440835730792472e-05, "loss": 0.0272, "step": 6408 }, { "epoch": 1.5, "learning_rate": 1.0438303820450425e-05, "loss": 0.0094, "step": 6409 }, { "epoch": 1.5, "learning_rate": 1.0435771881956216e-05, "loss": 0.0632, "step": 6410 }, { "epoch": 1.5, "learning_rate": 1.043323991547246e-05, "loss": 0.0339, "step": 6411 }, { "epoch": 1.5, "learning_rate": 1.0430707921161792e-05, "loss": 0.0124, "step": 6412 }, { "epoch": 1.51, "learning_rate": 1.0428175899186832e-05, "loss": 0.0092, "step": 6413 }, { "epoch": 1.51, "learning_rate": 1.042564384971022e-05, "loss": 0.0278, "step": 6414 }, { "epoch": 1.51, "learning_rate": 1.0423111772894585e-05, "loss": 0.0199, "step": 6415 }, { "epoch": 1.51, "learning_rate": 1.0420579668902566e-05, "loss": 0.0372, "step": 6416 }, { "epoch": 1.51, "learning_rate": 1.0418047537896794e-05, "loss": 0.0015, "step": 6417 }, { "epoch": 1.51, "learning_rate": 1.041551538003991e-05, "loss": 0.0031, "step": 6418 }, { "epoch": 1.51, "learning_rate": 1.0412983195494557e-05, "loss": 0.0146, "step": 6419 }, { "epoch": 1.51, "learning_rate": 1.0410450984423373e-05, "loss": 0.002, "step": 6420 }, { "epoch": 1.51, "learning_rate": 1.0407918746988998e-05, "loss": 0.0719, "step": 6421 }, { "epoch": 1.51, "learning_rate": 1.0405386483354085e-05, "loss": 0.0349, "step": 6422 }, { "epoch": 1.51, "learning_rate": 1.0402854193681276e-05, "loss": 0.0199, "step": 6423 }, { "epoch": 1.51, "learning_rate": 1.0400321878133217e-05, "loss": 0.0453, "step": 6424 }, { "epoch": 1.51, "learning_rate": 1.0397789536872564e-05, "loss": 0.0152, "step": 6425 }, { "epoch": 1.51, "learning_rate": 1.039525717006197e-05, "loss": 0.0104, "step": 6426 }, { "epoch": 1.51, "learning_rate": 1.0392724777864081e-05, "loss": 0.0021, "step": 6427 }, { "epoch": 1.51, "learning_rate": 1.0390192360441558e-05, "loss": 0.0062, "step": 6428 }, { "epoch": 1.51, "learning_rate": 1.0387659917957052e-05, "loss": 0.0128, "step": 6429 }, { "epoch": 1.51, "learning_rate": 1.0385127450573229e-05, "loss": 0.0077, "step": 6430 }, { "epoch": 1.51, "learning_rate": 1.038259495845274e-05, "loss": 0.0225, "step": 6431 }, { "epoch": 1.51, "learning_rate": 1.0380062441758253e-05, "loss": 0.0214, "step": 6432 }, { "epoch": 1.51, "learning_rate": 1.037752990065243e-05, "loss": 0.0435, "step": 6433 }, { "epoch": 1.51, "learning_rate": 1.0374997335297936e-05, "loss": 0.0173, "step": 6434 }, { "epoch": 1.51, "learning_rate": 1.0372464745857434e-05, "loss": 0.0283, "step": 6435 }, { "epoch": 1.51, "learning_rate": 1.0369932132493596e-05, "loss": 0.0605, "step": 6436 }, { "epoch": 1.51, "learning_rate": 1.0367399495369087e-05, "loss": 0.0721, "step": 6437 }, { "epoch": 1.51, "learning_rate": 1.0364866834646581e-05, "loss": 0.0433, "step": 6438 }, { "epoch": 1.51, "learning_rate": 1.036233415048875e-05, "loss": 0.0339, "step": 6439 }, { "epoch": 1.51, "learning_rate": 1.0359801443058265e-05, "loss": 0.0181, "step": 6440 }, { "epoch": 1.51, "learning_rate": 1.03572687125178e-05, "loss": 0.0874, "step": 6441 }, { "epoch": 1.51, "learning_rate": 1.0354735959030042e-05, "loss": 0.01, "step": 6442 }, { "epoch": 1.51, "learning_rate": 1.0352203182757657e-05, "loss": 0.019, "step": 6443 }, { "epoch": 1.51, "learning_rate": 1.0349670383863332e-05, "loss": 0.0109, "step": 6444 }, { "epoch": 1.51, "learning_rate": 1.0347137562509748e-05, "loss": 0.0233, "step": 6445 }, { "epoch": 1.51, "learning_rate": 1.0344604718859585e-05, "loss": 0.0035, "step": 6446 }, { "epoch": 1.51, "learning_rate": 1.0342071853075526e-05, "loss": 0.0258, "step": 6447 }, { "epoch": 1.51, "learning_rate": 1.0339538965320262e-05, "loss": 0.0364, "step": 6448 }, { "epoch": 1.51, "learning_rate": 1.0337006055756468e-05, "loss": 0.0055, "step": 6449 }, { "epoch": 1.51, "learning_rate": 1.0334473124546849e-05, "loss": 0.075, "step": 6450 }, { "epoch": 1.51, "learning_rate": 1.0331940171854082e-05, "loss": 0.0233, "step": 6451 }, { "epoch": 1.51, "learning_rate": 1.0329407197840866e-05, "loss": 0.0045, "step": 6452 }, { "epoch": 1.51, "learning_rate": 1.0326874202669885e-05, "loss": 0.0217, "step": 6453 }, { "epoch": 1.51, "learning_rate": 1.0324341186503842e-05, "loss": 0.0591, "step": 6454 }, { "epoch": 1.51, "learning_rate": 1.0321808149505424e-05, "loss": 0.0224, "step": 6455 }, { "epoch": 1.52, "learning_rate": 1.0319275091837334e-05, "loss": 0.0145, "step": 6456 }, { "epoch": 1.52, "learning_rate": 1.0316742013662264e-05, "loss": 0.0222, "step": 6457 }, { "epoch": 1.52, "learning_rate": 1.0314208915142918e-05, "loss": 0.0089, "step": 6458 }, { "epoch": 1.52, "learning_rate": 1.0311675796441993e-05, "loss": 0.0127, "step": 6459 }, { "epoch": 1.52, "learning_rate": 1.0309142657722192e-05, "loss": 0.0371, "step": 6460 }, { "epoch": 1.52, "learning_rate": 1.0306609499146221e-05, "loss": 0.0042, "step": 6461 }, { "epoch": 1.52, "learning_rate": 1.0304076320876781e-05, "loss": 0.0337, "step": 6462 }, { "epoch": 1.52, "learning_rate": 1.0301543123076576e-05, "loss": 0.0011, "step": 6463 }, { "epoch": 1.52, "learning_rate": 1.0299009905908317e-05, "loss": 0.0444, "step": 6464 }, { "epoch": 1.52, "learning_rate": 1.0296476669534708e-05, "loss": 0.0006, "step": 6465 }, { "epoch": 1.52, "learning_rate": 1.0293943414118461e-05, "loss": 0.0657, "step": 6466 }, { "epoch": 1.52, "learning_rate": 1.0291410139822282e-05, "loss": 0.0145, "step": 6467 }, { "epoch": 1.52, "learning_rate": 1.028887684680889e-05, "loss": 0.021, "step": 6468 }, { "epoch": 1.52, "learning_rate": 1.0286343535240993e-05, "loss": 0.0918, "step": 6469 }, { "epoch": 1.52, "learning_rate": 1.0283810205281308e-05, "loss": 0.001, "step": 6470 }, { "epoch": 1.52, "learning_rate": 1.0281276857092548e-05, "loss": 0.0148, "step": 6471 }, { "epoch": 1.52, "learning_rate": 1.027874349083743e-05, "loss": 0.0008, "step": 6472 }, { "epoch": 1.52, "learning_rate": 1.0276210106678672e-05, "loss": 0.0191, "step": 6473 }, { "epoch": 1.52, "learning_rate": 1.0273676704778994e-05, "loss": 0.0223, "step": 6474 }, { "epoch": 1.52, "learning_rate": 1.0271143285301108e-05, "loss": 0.075, "step": 6475 }, { "epoch": 1.52, "learning_rate": 1.026860984840775e-05, "loss": 0.0376, "step": 6476 }, { "epoch": 1.52, "learning_rate": 1.0266076394261629e-05, "loss": 0.0991, "step": 6477 }, { "epoch": 1.52, "learning_rate": 1.0263542923025473e-05, "loss": 0.0347, "step": 6478 }, { "epoch": 1.52, "learning_rate": 1.0261009434862006e-05, "loss": 0.0107, "step": 6479 }, { "epoch": 1.52, "learning_rate": 1.0258475929933958e-05, "loss": 0.0227, "step": 6480 }, { "epoch": 1.52, "learning_rate": 1.0255942408404047e-05, "loss": 0.016, "step": 6481 }, { "epoch": 1.52, "learning_rate": 1.0253408870435007e-05, "loss": 0.0956, "step": 6482 }, { "epoch": 1.52, "learning_rate": 1.0250875316189561e-05, "loss": 0.008, "step": 6483 }, { "epoch": 1.52, "learning_rate": 1.0248341745830444e-05, "loss": 0.0062, "step": 6484 }, { "epoch": 1.52, "learning_rate": 1.0245808159520385e-05, "loss": 0.0306, "step": 6485 }, { "epoch": 1.52, "learning_rate": 1.0243274557422118e-05, "loss": 0.0257, "step": 6486 }, { "epoch": 1.52, "learning_rate": 1.024074093969837e-05, "loss": 0.1105, "step": 6487 }, { "epoch": 1.52, "learning_rate": 1.023820730651188e-05, "loss": 0.0122, "step": 6488 }, { "epoch": 1.52, "learning_rate": 1.0235673658025382e-05, "loss": 0.0186, "step": 6489 }, { "epoch": 1.52, "learning_rate": 1.023313999440161e-05, "loss": 0.0033, "step": 6490 }, { "epoch": 1.52, "learning_rate": 1.02306063158033e-05, "loss": 0.0123, "step": 6491 }, { "epoch": 1.52, "learning_rate": 1.0228072622393192e-05, "loss": 0.0176, "step": 6492 }, { "epoch": 1.52, "learning_rate": 1.0225538914334024e-05, "loss": 0.0591, "step": 6493 }, { "epoch": 1.52, "learning_rate": 1.0223005191788536e-05, "loss": 0.0438, "step": 6494 }, { "epoch": 1.52, "learning_rate": 1.0220471454919465e-05, "loss": 0.0121, "step": 6495 }, { "epoch": 1.52, "learning_rate": 1.021793770388956e-05, "loss": 0.0243, "step": 6496 }, { "epoch": 1.52, "learning_rate": 1.0215403938861555e-05, "loss": 0.019, "step": 6497 }, { "epoch": 1.53, "learning_rate": 1.0212870159998201e-05, "loss": 0.0235, "step": 6498 }, { "epoch": 1.53, "learning_rate": 1.0210336367462233e-05, "loss": 0.0668, "step": 6499 }, { "epoch": 1.53, "learning_rate": 1.0207802561416405e-05, "loss": 0.042, "step": 6500 }, { "epoch": 1.53, "learning_rate": 1.0205268742023453e-05, "loss": 0.0235, "step": 6501 }, { "epoch": 1.53, "learning_rate": 1.0202734909446134e-05, "loss": 0.0121, "step": 6502 }, { "epoch": 1.53, "learning_rate": 1.020020106384719e-05, "loss": 0.0086, "step": 6503 }, { "epoch": 1.53, "learning_rate": 1.0197667205389373e-05, "loss": 0.0524, "step": 6504 }, { "epoch": 1.53, "learning_rate": 1.0195133334235427e-05, "loss": 0.0135, "step": 6505 }, { "epoch": 1.53, "learning_rate": 1.0192599450548106e-05, "loss": 0.0258, "step": 6506 }, { "epoch": 1.53, "learning_rate": 1.0190065554490161e-05, "loss": 0.0285, "step": 6507 }, { "epoch": 1.53, "learning_rate": 1.0187531646224343e-05, "loss": 0.0259, "step": 6508 }, { "epoch": 1.53, "learning_rate": 1.0184997725913399e-05, "loss": 0.0157, "step": 6509 }, { "epoch": 1.53, "learning_rate": 1.018246379372009e-05, "loss": 0.0458, "step": 6510 }, { "epoch": 1.53, "learning_rate": 1.0179929849807166e-05, "loss": 0.0087, "step": 6511 }, { "epoch": 1.53, "learning_rate": 1.0177395894337386e-05, "loss": 0.0156, "step": 6512 }, { "epoch": 1.53, "learning_rate": 1.0174861927473496e-05, "loss": 0.0664, "step": 6513 }, { "epoch": 1.53, "learning_rate": 1.0172327949378264e-05, "loss": 0.0415, "step": 6514 }, { "epoch": 1.53, "learning_rate": 1.0169793960214438e-05, "loss": 0.039, "step": 6515 }, { "epoch": 1.53, "learning_rate": 1.0167259960144781e-05, "loss": 0.024, "step": 6516 }, { "epoch": 1.53, "learning_rate": 1.0164725949332048e-05, "loss": 0.0222, "step": 6517 }, { "epoch": 1.53, "learning_rate": 1.0162191927939e-05, "loss": 0.0292, "step": 6518 }, { "epoch": 1.53, "learning_rate": 1.0159657896128396e-05, "loss": 0.0102, "step": 6519 }, { "epoch": 1.53, "learning_rate": 1.0157123854062998e-05, "loss": 0.0393, "step": 6520 }, { "epoch": 1.53, "learning_rate": 1.015458980190556e-05, "loss": 0.0678, "step": 6521 }, { "epoch": 1.53, "learning_rate": 1.0152055739818855e-05, "loss": 0.01, "step": 6522 }, { "epoch": 1.53, "learning_rate": 1.0149521667965636e-05, "loss": 0.0042, "step": 6523 }, { "epoch": 1.53, "learning_rate": 1.0146987586508671e-05, "loss": 0.0435, "step": 6524 }, { "epoch": 1.53, "learning_rate": 1.0144453495610721e-05, "loss": 0.0145, "step": 6525 }, { "epoch": 1.53, "learning_rate": 1.0141919395434554e-05, "loss": 0.0352, "step": 6526 }, { "epoch": 1.53, "learning_rate": 1.013938528614293e-05, "loss": 0.0431, "step": 6527 }, { "epoch": 1.53, "learning_rate": 1.0136851167898612e-05, "loss": 0.0312, "step": 6528 }, { "epoch": 1.53, "learning_rate": 1.0134317040864371e-05, "loss": 0.0153, "step": 6529 }, { "epoch": 1.53, "learning_rate": 1.0131782905202978e-05, "loss": 0.0586, "step": 6530 }, { "epoch": 1.53, "learning_rate": 1.012924876107719e-05, "loss": 0.0786, "step": 6531 }, { "epoch": 1.53, "learning_rate": 1.0126714608649784e-05, "loss": 0.0544, "step": 6532 }, { "epoch": 1.53, "learning_rate": 1.0124180448083518e-05, "loss": 0.0226, "step": 6533 }, { "epoch": 1.53, "learning_rate": 1.0121646279541172e-05, "loss": 0.0086, "step": 6534 }, { "epoch": 1.53, "learning_rate": 1.0119112103185502e-05, "loss": 0.0028, "step": 6535 }, { "epoch": 1.53, "learning_rate": 1.0116577919179286e-05, "loss": 0.0894, "step": 6536 }, { "epoch": 1.53, "learning_rate": 1.0114043727685295e-05, "loss": 0.0043, "step": 6537 }, { "epoch": 1.53, "learning_rate": 1.0111509528866299e-05, "loss": 0.0111, "step": 6538 }, { "epoch": 1.53, "learning_rate": 1.0108975322885064e-05, "loss": 0.0154, "step": 6539 }, { "epoch": 1.53, "learning_rate": 1.010644110990437e-05, "loss": 0.0185, "step": 6540 }, { "epoch": 1.54, "learning_rate": 1.0103906890086981e-05, "loss": 0.025, "step": 6541 }, { "epoch": 1.54, "learning_rate": 1.0101372663595672e-05, "loss": 0.0234, "step": 6542 }, { "epoch": 1.54, "learning_rate": 1.0098838430593218e-05, "loss": 0.0332, "step": 6543 }, { "epoch": 1.54, "learning_rate": 1.009630419124239e-05, "loss": 0.0157, "step": 6544 }, { "epoch": 1.54, "learning_rate": 1.0093769945705965e-05, "loss": 0.0773, "step": 6545 }, { "epoch": 1.54, "learning_rate": 1.009123569414671e-05, "loss": 0.0714, "step": 6546 }, { "epoch": 1.54, "learning_rate": 1.0088701436727406e-05, "loss": 0.004, "step": 6547 }, { "epoch": 1.54, "learning_rate": 1.0086167173610827e-05, "loss": 0.0139, "step": 6548 }, { "epoch": 1.54, "learning_rate": 1.0083632904959745e-05, "loss": 0.0484, "step": 6549 }, { "epoch": 1.54, "learning_rate": 1.0081098630936943e-05, "loss": 0.0358, "step": 6550 }, { "epoch": 1.54, "learning_rate": 1.0078564351705187e-05, "loss": 0.042, "step": 6551 }, { "epoch": 1.54, "learning_rate": 1.0076030067427258e-05, "loss": 0.0394, "step": 6552 }, { "epoch": 1.54, "learning_rate": 1.0073495778265936e-05, "loss": 0.0158, "step": 6553 }, { "epoch": 1.54, "learning_rate": 1.0070961484383989e-05, "loss": 0.0624, "step": 6554 }, { "epoch": 1.54, "learning_rate": 1.0068427185944204e-05, "loss": 0.0321, "step": 6555 }, { "epoch": 1.54, "learning_rate": 1.0065892883109354e-05, "loss": 0.0034, "step": 6556 }, { "epoch": 1.54, "learning_rate": 1.0063358576042214e-05, "loss": 0.0177, "step": 6557 }, { "epoch": 1.54, "learning_rate": 1.0060824264905569e-05, "loss": 0.0113, "step": 6558 }, { "epoch": 1.54, "learning_rate": 1.005828994986219e-05, "loss": 0.0023, "step": 6559 }, { "epoch": 1.54, "learning_rate": 1.0055755631074862e-05, "loss": 0.0069, "step": 6560 }, { "epoch": 1.54, "learning_rate": 1.0053221308706356e-05, "loss": 0.0485, "step": 6561 }, { "epoch": 1.54, "learning_rate": 1.0050686982919458e-05, "loss": 0.0505, "step": 6562 }, { "epoch": 1.54, "learning_rate": 1.0048152653876947e-05, "loss": 0.0605, "step": 6563 }, { "epoch": 1.54, "learning_rate": 1.0045618321741598e-05, "loss": 0.0297, "step": 6564 }, { "epoch": 1.54, "learning_rate": 1.0043083986676192e-05, "loss": 0.0087, "step": 6565 }, { "epoch": 1.54, "learning_rate": 1.0040549648843515e-05, "loss": 0.0081, "step": 6566 }, { "epoch": 1.54, "learning_rate": 1.0038015308406335e-05, "loss": 0.0186, "step": 6567 }, { "epoch": 1.54, "learning_rate": 1.0035480965527445e-05, "loss": 0.0109, "step": 6568 }, { "epoch": 1.54, "learning_rate": 1.0032946620369617e-05, "loss": 0.0088, "step": 6569 }, { "epoch": 1.54, "learning_rate": 1.0030412273095634e-05, "loss": 0.0156, "step": 6570 }, { "epoch": 1.54, "learning_rate": 1.0027877923868276e-05, "loss": 0.0277, "step": 6571 }, { "epoch": 1.54, "learning_rate": 1.0025343572850323e-05, "loss": 0.0218, "step": 6572 }, { "epoch": 1.54, "learning_rate": 1.002280922020456e-05, "loss": 0.065, "step": 6573 }, { "epoch": 1.54, "learning_rate": 1.0020274866093767e-05, "loss": 0.0394, "step": 6574 }, { "epoch": 1.54, "learning_rate": 1.0017740510680718e-05, "loss": 0.0278, "step": 6575 }, { "epoch": 1.54, "learning_rate": 1.0015206154128206e-05, "loss": 0.0535, "step": 6576 }, { "epoch": 1.54, "learning_rate": 1.0012671796599e-05, "loss": 0.0023, "step": 6577 }, { "epoch": 1.54, "learning_rate": 1.0010137438255888e-05, "loss": 0.0074, "step": 6578 }, { "epoch": 1.54, "learning_rate": 1.0007603079261652e-05, "loss": 0.0338, "step": 6579 }, { "epoch": 1.54, "learning_rate": 1.000506871977907e-05, "loss": 0.0379, "step": 6580 }, { "epoch": 1.54, "learning_rate": 1.0002534359970928e-05, "loss": 0.0028, "step": 6581 }, { "epoch": 1.54, "learning_rate": 1e-05, "loss": 0.0036, "step": 6582 }, { "epoch": 1.54, "learning_rate": 9.997465640029076e-06, "loss": 0.052, "step": 6583 }, { "epoch": 1.55, "learning_rate": 9.994931280220933e-06, "loss": 0.0022, "step": 6584 }, { "epoch": 1.55, "learning_rate": 9.992396920738351e-06, "loss": 0.0519, "step": 6585 }, { "epoch": 1.55, "learning_rate": 9.989862561744114e-06, "loss": 0.0004, "step": 6586 }, { "epoch": 1.55, "learning_rate": 9.987328203401003e-06, "loss": 0.06, "step": 6587 }, { "epoch": 1.55, "learning_rate": 9.984793845871801e-06, "loss": 0.0782, "step": 6588 }, { "epoch": 1.55, "learning_rate": 9.982259489319284e-06, "loss": 0.0041, "step": 6589 }, { "epoch": 1.55, "learning_rate": 9.97972513390624e-06, "loss": 0.0043, "step": 6590 }, { "epoch": 1.55, "learning_rate": 9.977190779795442e-06, "loss": 0.018, "step": 6591 }, { "epoch": 1.55, "learning_rate": 9.974656427149677e-06, "loss": 0.029, "step": 6592 }, { "epoch": 1.55, "learning_rate": 9.972122076131725e-06, "loss": 0.0667, "step": 6593 }, { "epoch": 1.55, "learning_rate": 9.969587726904368e-06, "loss": 0.0139, "step": 6594 }, { "epoch": 1.55, "learning_rate": 9.967053379630387e-06, "loss": 0.0576, "step": 6595 }, { "epoch": 1.55, "learning_rate": 9.964519034472558e-06, "loss": 0.0029, "step": 6596 }, { "epoch": 1.55, "learning_rate": 9.961984691593667e-06, "loss": 0.024, "step": 6597 }, { "epoch": 1.55, "learning_rate": 9.95945035115649e-06, "loss": 0.0291, "step": 6598 }, { "epoch": 1.55, "learning_rate": 9.956916013323811e-06, "loss": 0.0589, "step": 6599 }, { "epoch": 1.55, "learning_rate": 9.954381678258406e-06, "loss": 0.021, "step": 6600 }, { "epoch": 1.55, "learning_rate": 9.951847346123055e-06, "loss": 0.0018, "step": 6601 }, { "epoch": 1.55, "learning_rate": 9.949313017080545e-06, "loss": 0.0079, "step": 6602 }, { "epoch": 1.55, "learning_rate": 9.946778691293645e-06, "loss": 0.0177, "step": 6603 }, { "epoch": 1.55, "learning_rate": 9.944244368925145e-06, "loss": 0.023, "step": 6604 }, { "epoch": 1.55, "learning_rate": 9.941710050137812e-06, "loss": 0.0034, "step": 6605 }, { "epoch": 1.55, "learning_rate": 9.939175735094436e-06, "loss": 0.008, "step": 6606 }, { "epoch": 1.55, "learning_rate": 9.93664142395779e-06, "loss": 0.0878, "step": 6607 }, { "epoch": 1.55, "learning_rate": 9.934107116890648e-06, "loss": 0.0638, "step": 6608 }, { "epoch": 1.55, "learning_rate": 9.9315728140558e-06, "loss": 0.0383, "step": 6609 }, { "epoch": 1.55, "learning_rate": 9.92903851561601e-06, "loss": 0.0871, "step": 6610 }, { "epoch": 1.55, "learning_rate": 9.926504221734067e-06, "loss": 0.0391, "step": 6611 }, { "epoch": 1.55, "learning_rate": 9.923969932572742e-06, "loss": 0.0135, "step": 6612 }, { "epoch": 1.55, "learning_rate": 9.921435648294817e-06, "loss": 0.0164, "step": 6613 }, { "epoch": 1.55, "learning_rate": 9.91890136906306e-06, "loss": 0.0174, "step": 6614 }, { "epoch": 1.55, "learning_rate": 9.916367095040256e-06, "loss": 0.022, "step": 6615 }, { "epoch": 1.55, "learning_rate": 9.913832826389174e-06, "loss": 0.0089, "step": 6616 }, { "epoch": 1.55, "learning_rate": 9.911298563272594e-06, "loss": 0.0141, "step": 6617 }, { "epoch": 1.55, "learning_rate": 9.908764305853293e-06, "loss": 0.02, "step": 6618 }, { "epoch": 1.55, "learning_rate": 9.90623005429404e-06, "loss": 0.0357, "step": 6619 }, { "epoch": 1.55, "learning_rate": 9.903695808757615e-06, "loss": 0.01, "step": 6620 }, { "epoch": 1.55, "learning_rate": 9.901161569406785e-06, "loss": 0.0289, "step": 6621 }, { "epoch": 1.55, "learning_rate": 9.898627336404332e-06, "loss": 0.0621, "step": 6622 }, { "epoch": 1.55, "learning_rate": 9.896093109913024e-06, "loss": 0.0534, "step": 6623 }, { "epoch": 1.55, "learning_rate": 9.893558890095636e-06, "loss": 0.0562, "step": 6624 }, { "epoch": 1.55, "learning_rate": 9.891024677114938e-06, "loss": 0.0353, "step": 6625 }, { "epoch": 1.56, "learning_rate": 9.888490471133701e-06, "loss": 0.0125, "step": 6626 }, { "epoch": 1.56, "learning_rate": 9.885956272314707e-06, "loss": 0.1139, "step": 6627 }, { "epoch": 1.56, "learning_rate": 9.883422080820714e-06, "loss": 0.0122, "step": 6628 }, { "epoch": 1.56, "learning_rate": 9.8808878968145e-06, "loss": 0.0509, "step": 6629 }, { "epoch": 1.56, "learning_rate": 9.878353720458833e-06, "loss": 0.0238, "step": 6630 }, { "epoch": 1.56, "learning_rate": 9.875819551916485e-06, "loss": 0.0197, "step": 6631 }, { "epoch": 1.56, "learning_rate": 9.87328539135022e-06, "loss": 0.0238, "step": 6632 }, { "epoch": 1.56, "learning_rate": 9.870751238922812e-06, "loss": 0.0367, "step": 6633 }, { "epoch": 1.56, "learning_rate": 9.868217094797025e-06, "loss": 0.0545, "step": 6634 }, { "epoch": 1.56, "learning_rate": 9.865682959135627e-06, "loss": 0.04, "step": 6635 }, { "epoch": 1.56, "learning_rate": 9.86314883210139e-06, "loss": 0.0096, "step": 6636 }, { "epoch": 1.56, "learning_rate": 9.860614713857076e-06, "loss": 0.0416, "step": 6637 }, { "epoch": 1.56, "learning_rate": 9.858080604565451e-06, "loss": 0.0081, "step": 6638 }, { "epoch": 1.56, "learning_rate": 9.85554650438928e-06, "loss": 0.017, "step": 6639 }, { "epoch": 1.56, "learning_rate": 9.853012413491332e-06, "loss": 0.0126, "step": 6640 }, { "epoch": 1.56, "learning_rate": 9.850478332034366e-06, "loss": 0.0439, "step": 6641 }, { "epoch": 1.56, "learning_rate": 9.84794426018115e-06, "loss": 0.0325, "step": 6642 }, { "epoch": 1.56, "learning_rate": 9.845410198094441e-06, "loss": 0.0151, "step": 6643 }, { "epoch": 1.56, "learning_rate": 9.842876145937004e-06, "loss": 0.0328, "step": 6644 }, { "epoch": 1.56, "learning_rate": 9.840342103871607e-06, "loss": 0.0411, "step": 6645 }, { "epoch": 1.56, "learning_rate": 9.837808072061e-06, "loss": 0.0178, "step": 6646 }, { "epoch": 1.56, "learning_rate": 9.835274050667953e-06, "loss": 0.0321, "step": 6647 }, { "epoch": 1.56, "learning_rate": 9.83274003985522e-06, "loss": 0.0154, "step": 6648 }, { "epoch": 1.56, "learning_rate": 9.830206039785564e-06, "loss": 0.004, "step": 6649 }, { "epoch": 1.56, "learning_rate": 9.827672050621737e-06, "loss": 0.0032, "step": 6650 }, { "epoch": 1.56, "learning_rate": 9.825138072526504e-06, "loss": 0.0526, "step": 6651 }, { "epoch": 1.56, "learning_rate": 9.82260410566262e-06, "loss": 0.0092, "step": 6652 }, { "epoch": 1.56, "learning_rate": 9.820070150192835e-06, "loss": 0.0015, "step": 6653 }, { "epoch": 1.56, "learning_rate": 9.817536206279915e-06, "loss": 0.0801, "step": 6654 }, { "epoch": 1.56, "learning_rate": 9.815002274086603e-06, "loss": 0.0708, "step": 6655 }, { "epoch": 1.56, "learning_rate": 9.812468353775664e-06, "loss": 0.0119, "step": 6656 }, { "epoch": 1.56, "learning_rate": 9.809934445509844e-06, "loss": 0.0214, "step": 6657 }, { "epoch": 1.56, "learning_rate": 9.807400549451898e-06, "loss": 0.0721, "step": 6658 }, { "epoch": 1.56, "learning_rate": 9.804866665764575e-06, "loss": 0.0405, "step": 6659 }, { "epoch": 1.56, "learning_rate": 9.802332794610627e-06, "loss": 0.0242, "step": 6660 }, { "epoch": 1.56, "learning_rate": 9.799798936152812e-06, "loss": 0.0065, "step": 6661 }, { "epoch": 1.56, "learning_rate": 9.797265090553866e-06, "loss": 0.0147, "step": 6662 }, { "epoch": 1.56, "learning_rate": 9.79473125797655e-06, "loss": 0.0013, "step": 6663 }, { "epoch": 1.56, "learning_rate": 9.7921974385836e-06, "loss": 0.0019, "step": 6664 }, { "epoch": 1.56, "learning_rate": 9.78966363253777e-06, "loss": 0.0352, "step": 6665 }, { "epoch": 1.56, "learning_rate": 9.787129840001804e-06, "loss": 0.0584, "step": 6666 }, { "epoch": 1.56, "learning_rate": 9.784596061138449e-06, "loss": 0.0233, "step": 6667 }, { "epoch": 1.56, "learning_rate": 9.782062296110443e-06, "loss": 0.1116, "step": 6668 }, { "epoch": 1.57, "learning_rate": 9.779528545080535e-06, "loss": 0.0051, "step": 6669 }, { "epoch": 1.57, "learning_rate": 9.776994808211467e-06, "loss": 0.0887, "step": 6670 }, { "epoch": 1.57, "learning_rate": 9.774461085665977e-06, "loss": 0.1339, "step": 6671 }, { "epoch": 1.57, "learning_rate": 9.771927377606811e-06, "loss": 0.0252, "step": 6672 }, { "epoch": 1.57, "learning_rate": 9.769393684196702e-06, "loss": 0.0243, "step": 6673 }, { "epoch": 1.57, "learning_rate": 9.766860005598395e-06, "loss": 0.0165, "step": 6674 }, { "epoch": 1.57, "learning_rate": 9.764326341974621e-06, "loss": 0.0105, "step": 6675 }, { "epoch": 1.57, "learning_rate": 9.761792693488124e-06, "loss": 0.0364, "step": 6676 }, { "epoch": 1.57, "learning_rate": 9.759259060301632e-06, "loss": 0.011, "step": 6677 }, { "epoch": 1.57, "learning_rate": 9.756725442577884e-06, "loss": 0.0086, "step": 6678 }, { "epoch": 1.57, "learning_rate": 9.754191840479617e-06, "loss": 0.0467, "step": 6679 }, { "epoch": 1.57, "learning_rate": 9.751658254169556e-06, "loss": 0.0064, "step": 6680 }, { "epoch": 1.57, "learning_rate": 9.749124683810444e-06, "loss": 0.0193, "step": 6681 }, { "epoch": 1.57, "learning_rate": 9.746591129564997e-06, "loss": 0.0152, "step": 6682 }, { "epoch": 1.57, "learning_rate": 9.744057591595957e-06, "loss": 0.0136, "step": 6683 }, { "epoch": 1.57, "learning_rate": 9.741524070066046e-06, "loss": 0.0011, "step": 6684 }, { "epoch": 1.57, "learning_rate": 9.738990565137996e-06, "loss": 0.0303, "step": 6685 }, { "epoch": 1.57, "learning_rate": 9.736457076974529e-06, "loss": 0.0219, "step": 6686 }, { "epoch": 1.57, "learning_rate": 9.733923605738373e-06, "loss": 0.0254, "step": 6687 }, { "epoch": 1.57, "learning_rate": 9.731390151592254e-06, "loss": 0.0466, "step": 6688 }, { "epoch": 1.57, "learning_rate": 9.728856714698892e-06, "loss": 0.0204, "step": 6689 }, { "epoch": 1.57, "learning_rate": 9.726323295221011e-06, "loss": 0.003, "step": 6690 }, { "epoch": 1.57, "learning_rate": 9.723789893321331e-06, "loss": 0.0414, "step": 6691 }, { "epoch": 1.57, "learning_rate": 9.721256509162575e-06, "loss": 0.0339, "step": 6692 }, { "epoch": 1.57, "learning_rate": 9.718723142907455e-06, "loss": 0.1076, "step": 6693 }, { "epoch": 1.57, "learning_rate": 9.716189794718692e-06, "loss": 0.0128, "step": 6694 }, { "epoch": 1.57, "learning_rate": 9.713656464759009e-06, "loss": 0.0083, "step": 6695 }, { "epoch": 1.57, "learning_rate": 9.71112315319111e-06, "loss": 0.0405, "step": 6696 }, { "epoch": 1.57, "learning_rate": 9.708589860177722e-06, "loss": 0.005, "step": 6697 }, { "epoch": 1.57, "learning_rate": 9.706056585881542e-06, "loss": 0.0452, "step": 6698 }, { "epoch": 1.57, "learning_rate": 9.703523330465299e-06, "loss": 0.0121, "step": 6699 }, { "epoch": 1.57, "learning_rate": 9.700990094091686e-06, "loss": 0.0109, "step": 6700 }, { "epoch": 1.57, "learning_rate": 9.698456876923427e-06, "loss": 0.0192, "step": 6701 }, { "epoch": 1.57, "learning_rate": 9.695923679123222e-06, "loss": 0.0011, "step": 6702 }, { "epoch": 1.57, "learning_rate": 9.69339050085378e-06, "loss": 0.0475, "step": 6703 }, { "epoch": 1.57, "learning_rate": 9.69085734227781e-06, "loss": 0.0324, "step": 6704 }, { "epoch": 1.57, "learning_rate": 9.688324203558008e-06, "loss": 0.0221, "step": 6705 }, { "epoch": 1.57, "learning_rate": 9.685791084857085e-06, "loss": 0.0276, "step": 6706 }, { "epoch": 1.57, "learning_rate": 9.683257986337739e-06, "loss": 0.0022, "step": 6707 }, { "epoch": 1.57, "learning_rate": 9.68072490816267e-06, "loss": 0.0407, "step": 6708 }, { "epoch": 1.57, "learning_rate": 9.678191850494578e-06, "loss": 0.0072, "step": 6709 }, { "epoch": 1.57, "learning_rate": 9.675658813496163e-06, "loss": 0.0017, "step": 6710 }, { "epoch": 1.58, "learning_rate": 9.673125797330117e-06, "loss": 0.0007, "step": 6711 }, { "epoch": 1.58, "learning_rate": 9.670592802159136e-06, "loss": 0.0073, "step": 6712 }, { "epoch": 1.58, "learning_rate": 9.668059828145921e-06, "loss": 0.0412, "step": 6713 }, { "epoch": 1.58, "learning_rate": 9.665526875453151e-06, "loss": 0.0628, "step": 6714 }, { "epoch": 1.58, "learning_rate": 9.662993944243534e-06, "loss": 0.1223, "step": 6715 }, { "epoch": 1.58, "learning_rate": 9.660461034679743e-06, "loss": 0.026, "step": 6716 }, { "epoch": 1.58, "learning_rate": 9.65792814692448e-06, "loss": 0.1481, "step": 6717 }, { "epoch": 1.58, "learning_rate": 9.655395281140419e-06, "loss": 0.0142, "step": 6718 }, { "epoch": 1.58, "learning_rate": 9.652862437490255e-06, "loss": 0.0933, "step": 6719 }, { "epoch": 1.58, "learning_rate": 9.65032961613667e-06, "loss": 0.0061, "step": 6720 }, { "epoch": 1.58, "learning_rate": 9.647796817242343e-06, "loss": 0.0121, "step": 6721 }, { "epoch": 1.58, "learning_rate": 9.645264040969961e-06, "loss": 0.0608, "step": 6722 }, { "epoch": 1.58, "learning_rate": 9.6427312874822e-06, "loss": 0.002, "step": 6723 }, { "epoch": 1.58, "learning_rate": 9.64019855694174e-06, "loss": 0.0062, "step": 6724 }, { "epoch": 1.58, "learning_rate": 9.637665849511254e-06, "loss": 0.0202, "step": 6725 }, { "epoch": 1.58, "learning_rate": 9.635133165353424e-06, "loss": 0.044, "step": 6726 }, { "epoch": 1.58, "learning_rate": 9.632600504630915e-06, "loss": 0.0156, "step": 6727 }, { "epoch": 1.58, "learning_rate": 9.630067867506404e-06, "loss": 0.013, "step": 6728 }, { "epoch": 1.58, "learning_rate": 9.627535254142567e-06, "loss": 0.023, "step": 6729 }, { "epoch": 1.58, "learning_rate": 9.625002664702064e-06, "loss": 0.0281, "step": 6730 }, { "epoch": 1.58, "learning_rate": 9.622470099347572e-06, "loss": 0.0505, "step": 6731 }, { "epoch": 1.58, "learning_rate": 9.619937558241747e-06, "loss": 0.0344, "step": 6732 }, { "epoch": 1.58, "learning_rate": 9.617405041547265e-06, "loss": 0.0148, "step": 6733 }, { "epoch": 1.58, "learning_rate": 9.614872549426774e-06, "loss": 0.0409, "step": 6734 }, { "epoch": 1.58, "learning_rate": 9.612340082042953e-06, "loss": 0.0409, "step": 6735 }, { "epoch": 1.58, "learning_rate": 9.609807639558446e-06, "loss": 0.1013, "step": 6736 }, { "epoch": 1.58, "learning_rate": 9.60727522213592e-06, "loss": 0.0114, "step": 6737 }, { "epoch": 1.58, "learning_rate": 9.604742829938035e-06, "loss": 0.034, "step": 6738 }, { "epoch": 1.58, "learning_rate": 9.602210463127436e-06, "loss": 0.0441, "step": 6739 }, { "epoch": 1.58, "learning_rate": 9.599678121866784e-06, "loss": 0.0107, "step": 6740 }, { "epoch": 1.58, "learning_rate": 9.597145806318728e-06, "loss": 0.0352, "step": 6741 }, { "epoch": 1.58, "learning_rate": 9.59461351664592e-06, "loss": 0.0286, "step": 6742 }, { "epoch": 1.58, "learning_rate": 9.592081253011003e-06, "loss": 0.0402, "step": 6743 }, { "epoch": 1.58, "learning_rate": 9.589549015576634e-06, "loss": 0.0107, "step": 6744 }, { "epoch": 1.58, "learning_rate": 9.587016804505446e-06, "loss": 0.0543, "step": 6745 }, { "epoch": 1.58, "learning_rate": 9.584484619960088e-06, "loss": 0.0323, "step": 6746 }, { "epoch": 1.58, "learning_rate": 9.58195246210321e-06, "loss": 0.0167, "step": 6747 }, { "epoch": 1.58, "learning_rate": 9.579420331097435e-06, "loss": 0.0042, "step": 6748 }, { "epoch": 1.58, "learning_rate": 9.576888227105417e-06, "loss": 0.0308, "step": 6749 }, { "epoch": 1.58, "learning_rate": 9.574356150289782e-06, "loss": 0.0317, "step": 6750 }, { "epoch": 1.58, "learning_rate": 9.571824100813173e-06, "loss": 0.0684, "step": 6751 }, { "epoch": 1.58, "learning_rate": 9.569292078838213e-06, "loss": 0.0168, "step": 6752 }, { "epoch": 1.58, "learning_rate": 9.566760084527546e-06, "loss": 0.0099, "step": 6753 }, { "epoch": 1.59, "learning_rate": 9.564228118043786e-06, "loss": 0.048, "step": 6754 }, { "epoch": 1.59, "learning_rate": 9.561696179549573e-06, "loss": 0.0172, "step": 6755 }, { "epoch": 1.59, "learning_rate": 9.559164269207531e-06, "loss": 0.0351, "step": 6756 }, { "epoch": 1.59, "learning_rate": 9.556632387180282e-06, "loss": 0.0029, "step": 6757 }, { "epoch": 1.59, "learning_rate": 9.554100533630449e-06, "loss": 0.008, "step": 6758 }, { "epoch": 1.59, "learning_rate": 9.55156870872065e-06, "loss": 0.0102, "step": 6759 }, { "epoch": 1.59, "learning_rate": 9.549036912613507e-06, "loss": 0.0183, "step": 6760 }, { "epoch": 1.59, "learning_rate": 9.546505145471635e-06, "loss": 0.0052, "step": 6761 }, { "epoch": 1.59, "learning_rate": 9.54397340745765e-06, "loss": 0.0052, "step": 6762 }, { "epoch": 1.59, "learning_rate": 9.541441698734163e-06, "loss": 0.0044, "step": 6763 }, { "epoch": 1.59, "learning_rate": 9.538910019463786e-06, "loss": 0.0046, "step": 6764 }, { "epoch": 1.59, "learning_rate": 9.536378369809132e-06, "loss": 0.0576, "step": 6765 }, { "epoch": 1.59, "learning_rate": 9.533846749932801e-06, "loss": 0.0391, "step": 6766 }, { "epoch": 1.59, "learning_rate": 9.53131515999741e-06, "loss": 0.0531, "step": 6767 }, { "epoch": 1.59, "learning_rate": 9.528783600165547e-06, "loss": 0.0035, "step": 6768 }, { "epoch": 1.59, "learning_rate": 9.526252070599828e-06, "loss": 0.01, "step": 6769 }, { "epoch": 1.59, "learning_rate": 9.523720571462841e-06, "loss": 0.0175, "step": 6770 }, { "epoch": 1.59, "learning_rate": 9.521189102917191e-06, "loss": 0.0215, "step": 6771 }, { "epoch": 1.59, "learning_rate": 9.518657665125474e-06, "loss": 0.04, "step": 6772 }, { "epoch": 1.59, "learning_rate": 9.51612625825028e-06, "loss": 0.0032, "step": 6773 }, { "epoch": 1.59, "learning_rate": 9.513594882454203e-06, "loss": 0.0069, "step": 6774 }, { "epoch": 1.59, "learning_rate": 9.511063537899832e-06, "loss": 0.0172, "step": 6775 }, { "epoch": 1.59, "learning_rate": 9.508532224749755e-06, "loss": 0.0213, "step": 6776 }, { "epoch": 1.59, "learning_rate": 9.506000943166557e-06, "loss": 0.0029, "step": 6777 }, { "epoch": 1.59, "learning_rate": 9.503469693312823e-06, "loss": 0.0568, "step": 6778 }, { "epoch": 1.59, "learning_rate": 9.500938475351134e-06, "loss": 0.0176, "step": 6779 }, { "epoch": 1.59, "learning_rate": 9.498407289444066e-06, "loss": 0.0795, "step": 6780 }, { "epoch": 1.59, "learning_rate": 9.495876135754209e-06, "loss": 0.004, "step": 6781 }, { "epoch": 1.59, "learning_rate": 9.493345014444122e-06, "loss": 0.032, "step": 6782 }, { "epoch": 1.59, "learning_rate": 9.490813925676392e-06, "loss": 0.1216, "step": 6783 }, { "epoch": 1.59, "learning_rate": 9.488282869613578e-06, "loss": 0.0344, "step": 6784 }, { "epoch": 1.59, "learning_rate": 9.485751846418263e-06, "loss": 0.0021, "step": 6785 }, { "epoch": 1.59, "learning_rate": 9.483220856253e-06, "loss": 0.0091, "step": 6786 }, { "epoch": 1.59, "learning_rate": 9.48068989928037e-06, "loss": 0.0037, "step": 6787 }, { "epoch": 1.59, "learning_rate": 9.478158975662917e-06, "loss": 0.032, "step": 6788 }, { "epoch": 1.59, "learning_rate": 9.475628085563214e-06, "loss": 0.0363, "step": 6789 }, { "epoch": 1.59, "learning_rate": 9.473097229143821e-06, "loss": 0.0611, "step": 6790 }, { "epoch": 1.59, "learning_rate": 9.470566406567288e-06, "loss": 0.122, "step": 6791 }, { "epoch": 1.59, "learning_rate": 9.468035617996172e-06, "loss": 0.0742, "step": 6792 }, { "epoch": 1.59, "learning_rate": 9.465504863593026e-06, "loss": 0.015, "step": 6793 }, { "epoch": 1.59, "learning_rate": 9.4629741435204e-06, "loss": 0.0233, "step": 6794 }, { "epoch": 1.59, "learning_rate": 9.460443457940837e-06, "loss": 0.0519, "step": 6795 }, { "epoch": 1.59, "learning_rate": 9.457912807016887e-06, "loss": 0.078, "step": 6796 }, { "epoch": 1.6, "learning_rate": 9.455382190911092e-06, "loss": 0.0114, "step": 6797 }, { "epoch": 1.6, "learning_rate": 9.452851609785991e-06, "loss": 0.064, "step": 6798 }, { "epoch": 1.6, "learning_rate": 9.450321063804131e-06, "loss": 0.0538, "step": 6799 }, { "epoch": 1.6, "learning_rate": 9.447790553128034e-06, "loss": 0.1197, "step": 6800 }, { "epoch": 1.6, "learning_rate": 9.445260077920251e-06, "loss": 0.0081, "step": 6801 }, { "epoch": 1.6, "learning_rate": 9.442729638343297e-06, "loss": 0.0247, "step": 6802 }, { "epoch": 1.6, "learning_rate": 9.440199234559719e-06, "loss": 0.048, "step": 6803 }, { "epoch": 1.6, "learning_rate": 9.437668866732027e-06, "loss": 0.022, "step": 6804 }, { "epoch": 1.6, "learning_rate": 9.435138535022762e-06, "loss": 0.019, "step": 6805 }, { "epoch": 1.6, "learning_rate": 9.432608239594432e-06, "loss": 0.0058, "step": 6806 }, { "epoch": 1.6, "learning_rate": 9.430077980609567e-06, "loss": 0.0485, "step": 6807 }, { "epoch": 1.6, "learning_rate": 9.427547758230683e-06, "loss": 0.018, "step": 6808 }, { "epoch": 1.6, "learning_rate": 9.425017572620295e-06, "loss": 0.0215, "step": 6809 }, { "epoch": 1.6, "learning_rate": 9.422487423940918e-06, "loss": 0.0015, "step": 6810 }, { "epoch": 1.6, "learning_rate": 9.41995731235506e-06, "loss": 0.0021, "step": 6811 }, { "epoch": 1.6, "learning_rate": 9.417427238025233e-06, "loss": 0.0176, "step": 6812 }, { "epoch": 1.6, "learning_rate": 9.41489720111394e-06, "loss": 0.0246, "step": 6813 }, { "epoch": 1.6, "learning_rate": 9.412367201783685e-06, "loss": 0.014, "step": 6814 }, { "epoch": 1.6, "learning_rate": 9.409837240196972e-06, "loss": 0.0369, "step": 6815 }, { "epoch": 1.6, "learning_rate": 9.407307316516295e-06, "loss": 0.0027, "step": 6816 }, { "epoch": 1.6, "learning_rate": 9.40477743090416e-06, "loss": 0.0134, "step": 6817 }, { "epoch": 1.6, "learning_rate": 9.402247583523047e-06, "loss": 0.0207, "step": 6818 }, { "epoch": 1.6, "learning_rate": 9.399717774535464e-06, "loss": 0.1041, "step": 6819 }, { "epoch": 1.6, "learning_rate": 9.397188004103884e-06, "loss": 0.0061, "step": 6820 }, { "epoch": 1.6, "learning_rate": 9.39465827239081e-06, "loss": 0.0186, "step": 6821 }, { "epoch": 1.6, "learning_rate": 9.392128579558709e-06, "loss": 0.0041, "step": 6822 }, { "epoch": 1.6, "learning_rate": 9.389598925770074e-06, "loss": 0.0392, "step": 6823 }, { "epoch": 1.6, "learning_rate": 9.387069311187383e-06, "loss": 0.0049, "step": 6824 }, { "epoch": 1.6, "learning_rate": 9.384539735973109e-06, "loss": 0.0682, "step": 6825 }, { "epoch": 1.6, "learning_rate": 9.382010200289731e-06, "loss": 0.0522, "step": 6826 }, { "epoch": 1.6, "learning_rate": 9.379480704299714e-06, "loss": 0.0471, "step": 6827 }, { "epoch": 1.6, "learning_rate": 9.376951248165535e-06, "loss": 0.0295, "step": 6828 }, { "epoch": 1.6, "learning_rate": 9.374421832049653e-06, "loss": 0.0259, "step": 6829 }, { "epoch": 1.6, "learning_rate": 9.371892456114537e-06, "loss": 0.0492, "step": 6830 }, { "epoch": 1.6, "learning_rate": 9.369363120522645e-06, "loss": 0.0841, "step": 6831 }, { "epoch": 1.6, "learning_rate": 9.366833825436438e-06, "loss": 0.0377, "step": 6832 }, { "epoch": 1.6, "learning_rate": 9.364304571018373e-06, "loss": 0.012, "step": 6833 }, { "epoch": 1.6, "learning_rate": 9.361775357430898e-06, "loss": 0.0508, "step": 6834 }, { "epoch": 1.6, "learning_rate": 9.359246184836471e-06, "loss": 0.0435, "step": 6835 }, { "epoch": 1.6, "learning_rate": 9.356717053397533e-06, "loss": 0.029, "step": 6836 }, { "epoch": 1.6, "learning_rate": 9.354187963276539e-06, "loss": 0.0114, "step": 6837 }, { "epoch": 1.6, "learning_rate": 9.351658914635923e-06, "loss": 0.0103, "step": 6838 }, { "epoch": 1.61, "learning_rate": 9.349129907638132e-06, "loss": 0.0273, "step": 6839 }, { "epoch": 1.61, "learning_rate": 9.346600942445596e-06, "loss": 0.0041, "step": 6840 }, { "epoch": 1.61, "learning_rate": 9.344072019220757e-06, "loss": 0.0064, "step": 6841 }, { "epoch": 1.61, "learning_rate": 9.341543138126045e-06, "loss": 0.0374, "step": 6842 }, { "epoch": 1.61, "learning_rate": 9.339014299323888e-06, "loss": 0.0058, "step": 6843 }, { "epoch": 1.61, "learning_rate": 9.336485502976716e-06, "loss": 0.005, "step": 6844 }, { "epoch": 1.61, "learning_rate": 9.333956749246951e-06, "loss": 0.0205, "step": 6845 }, { "epoch": 1.61, "learning_rate": 9.331428038297014e-06, "loss": 0.0224, "step": 6846 }, { "epoch": 1.61, "learning_rate": 9.328899370289323e-06, "loss": 0.002, "step": 6847 }, { "epoch": 1.61, "learning_rate": 9.326370745386299e-06, "loss": 0.031, "step": 6848 }, { "epoch": 1.61, "learning_rate": 9.323842163750346e-06, "loss": 0.0192, "step": 6849 }, { "epoch": 1.61, "learning_rate": 9.321313625543881e-06, "loss": 0.0422, "step": 6850 }, { "epoch": 1.61, "learning_rate": 9.318785130929312e-06, "loss": 0.0794, "step": 6851 }, { "epoch": 1.61, "learning_rate": 9.316256680069039e-06, "loss": 0.0308, "step": 6852 }, { "epoch": 1.61, "learning_rate": 9.313728273125469e-06, "loss": 0.0024, "step": 6853 }, { "epoch": 1.61, "learning_rate": 9.311199910260994e-06, "loss": 0.024, "step": 6854 }, { "epoch": 1.61, "learning_rate": 9.308671591638023e-06, "loss": 0.0158, "step": 6855 }, { "epoch": 1.61, "learning_rate": 9.306143317418932e-06, "loss": 0.0326, "step": 6856 }, { "epoch": 1.61, "learning_rate": 9.303615087766125e-06, "loss": 0.0073, "step": 6857 }, { "epoch": 1.61, "learning_rate": 9.301086902841986e-06, "loss": 0.0025, "step": 6858 }, { "epoch": 1.61, "learning_rate": 9.2985587628089e-06, "loss": 0.0089, "step": 6859 }, { "epoch": 1.61, "learning_rate": 9.296030667829249e-06, "loss": 0.0077, "step": 6860 }, { "epoch": 1.61, "learning_rate": 9.293502618065409e-06, "loss": 0.0235, "step": 6861 }, { "epoch": 1.61, "learning_rate": 9.290974613679762e-06, "loss": 0.0008, "step": 6862 }, { "epoch": 1.61, "learning_rate": 9.288446654834675e-06, "loss": 0.0047, "step": 6863 }, { "epoch": 1.61, "learning_rate": 9.285918741692521e-06, "loss": 0.0152, "step": 6864 }, { "epoch": 1.61, "learning_rate": 9.283390874415668e-06, "loss": 0.0204, "step": 6865 }, { "epoch": 1.61, "learning_rate": 9.28086305316648e-06, "loss": 0.0212, "step": 6866 }, { "epoch": 1.61, "learning_rate": 9.278335278107318e-06, "loss": 0.0052, "step": 6867 }, { "epoch": 1.61, "learning_rate": 9.27580754940054e-06, "loss": 0.0081, "step": 6868 }, { "epoch": 1.61, "learning_rate": 9.273279867208504e-06, "loss": 0.1474, "step": 6869 }, { "epoch": 1.61, "learning_rate": 9.27075223169356e-06, "loss": 0.0385, "step": 6870 }, { "epoch": 1.61, "learning_rate": 9.26822464301806e-06, "loss": 0.0093, "step": 6871 }, { "epoch": 1.61, "learning_rate": 9.265697101344345e-06, "loss": 0.0205, "step": 6872 }, { "epoch": 1.61, "learning_rate": 9.263169606834768e-06, "loss": 0.0317, "step": 6873 }, { "epoch": 1.61, "learning_rate": 9.260642159651659e-06, "loss": 0.0455, "step": 6874 }, { "epoch": 1.61, "learning_rate": 9.258114759957361e-06, "loss": 0.0628, "step": 6875 }, { "epoch": 1.61, "learning_rate": 9.25558740791421e-06, "loss": 0.0063, "step": 6876 }, { "epoch": 1.61, "learning_rate": 9.253060103684534e-06, "loss": 0.005, "step": 6877 }, { "epoch": 1.61, "learning_rate": 9.250532847430663e-06, "loss": 0.0777, "step": 6878 }, { "epoch": 1.61, "learning_rate": 9.248005639314921e-06, "loss": 0.0273, "step": 6879 }, { "epoch": 1.61, "learning_rate": 9.245478479499632e-06, "loss": 0.0556, "step": 6880 }, { "epoch": 1.61, "learning_rate": 9.242951368147111e-06, "loss": 0.0154, "step": 6881 }, { "epoch": 1.62, "learning_rate": 9.24042430541968e-06, "loss": 0.0353, "step": 6882 }, { "epoch": 1.62, "learning_rate": 9.237897291479647e-06, "loss": 0.0532, "step": 6883 }, { "epoch": 1.62, "learning_rate": 9.23537032648932e-06, "loss": 0.0079, "step": 6884 }, { "epoch": 1.62, "learning_rate": 9.232843410611012e-06, "loss": 0.0636, "step": 6885 }, { "epoch": 1.62, "learning_rate": 9.23031654400702e-06, "loss": 0.0684, "step": 6886 }, { "epoch": 1.62, "learning_rate": 9.22778972683965e-06, "loss": 0.0368, "step": 6887 }, { "epoch": 1.62, "learning_rate": 9.225262959271192e-06, "loss": 0.0154, "step": 6888 }, { "epoch": 1.62, "learning_rate": 9.222736241463947e-06, "loss": 0.0219, "step": 6889 }, { "epoch": 1.62, "learning_rate": 9.220209573580197e-06, "loss": 0.0005, "step": 6890 }, { "epoch": 1.62, "learning_rate": 9.21768295578224e-06, "loss": 0.0049, "step": 6891 }, { "epoch": 1.62, "learning_rate": 9.21515638823235e-06, "loss": 0.0054, "step": 6892 }, { "epoch": 1.62, "learning_rate": 9.212629871092815e-06, "loss": 0.0111, "step": 6893 }, { "epoch": 1.62, "learning_rate": 9.210103404525912e-06, "loss": 0.0099, "step": 6894 }, { "epoch": 1.62, "learning_rate": 9.207576988693911e-06, "loss": 0.023, "step": 6895 }, { "epoch": 1.62, "learning_rate": 9.20505062375909e-06, "loss": 0.0352, "step": 6896 }, { "epoch": 1.62, "learning_rate": 9.20252430988371e-06, "loss": 0.0027, "step": 6897 }, { "epoch": 1.62, "learning_rate": 9.199998047230038e-06, "loss": 0.0135, "step": 6898 }, { "epoch": 1.62, "learning_rate": 9.197471835960338e-06, "loss": 0.022, "step": 6899 }, { "epoch": 1.62, "learning_rate": 9.194945676236863e-06, "loss": 0.0052, "step": 6900 }, { "epoch": 1.62, "learning_rate": 9.192419568221874e-06, "loss": 0.0263, "step": 6901 }, { "epoch": 1.62, "learning_rate": 9.189893512077616e-06, "loss": 0.0445, "step": 6902 }, { "epoch": 1.62, "learning_rate": 9.187367507966345e-06, "loss": 0.0053, "step": 6903 }, { "epoch": 1.62, "learning_rate": 9.184841556050297e-06, "loss": 0.0082, "step": 6904 }, { "epoch": 1.62, "learning_rate": 9.18231565649172e-06, "loss": 0.0152, "step": 6905 }, { "epoch": 1.62, "learning_rate": 9.179789809452846e-06, "loss": 0.0366, "step": 6906 }, { "epoch": 1.62, "learning_rate": 9.177264015095919e-06, "loss": 0.0654, "step": 6907 }, { "epoch": 1.62, "learning_rate": 9.174738273583156e-06, "loss": 0.0028, "step": 6908 }, { "epoch": 1.62, "learning_rate": 9.172212585076797e-06, "loss": 0.0019, "step": 6909 }, { "epoch": 1.62, "learning_rate": 9.169686949739065e-06, "loss": 0.0302, "step": 6910 }, { "epoch": 1.62, "learning_rate": 9.167161367732177e-06, "loss": 0.0279, "step": 6911 }, { "epoch": 1.62, "learning_rate": 9.164635839218354e-06, "loss": 0.0042, "step": 6912 }, { "epoch": 1.62, "learning_rate": 9.162110364359807e-06, "loss": 0.0675, "step": 6913 }, { "epoch": 1.62, "learning_rate": 9.15958494331875e-06, "loss": 0.0224, "step": 6914 }, { "epoch": 1.62, "learning_rate": 9.157059576257387e-06, "loss": 0.0142, "step": 6915 }, { "epoch": 1.62, "learning_rate": 9.154534263337927e-06, "loss": 0.0078, "step": 6916 }, { "epoch": 1.62, "learning_rate": 9.152009004722564e-06, "loss": 0.071, "step": 6917 }, { "epoch": 1.62, "learning_rate": 9.149483800573496e-06, "loss": 0.0156, "step": 6918 }, { "epoch": 1.62, "learning_rate": 9.146958651052922e-06, "loss": 0.001, "step": 6919 }, { "epoch": 1.62, "learning_rate": 9.144433556323027e-06, "loss": 0.0136, "step": 6920 }, { "epoch": 1.62, "learning_rate": 9.141908516545999e-06, "loss": 0.028, "step": 6921 }, { "epoch": 1.62, "learning_rate": 9.139383531884018e-06, "loss": 0.0031, "step": 6922 }, { "epoch": 1.62, "learning_rate": 9.136858602499268e-06, "loss": 0.0188, "step": 6923 }, { "epoch": 1.63, "learning_rate": 9.134333728553921e-06, "loss": 0.0067, "step": 6924 }, { "epoch": 1.63, "learning_rate": 9.131808910210151e-06, "loss": 0.0085, "step": 6925 }, { "epoch": 1.63, "learning_rate": 9.129284147630122e-06, "loss": 0.0367, "step": 6926 }, { "epoch": 1.63, "learning_rate": 9.126759440976005e-06, "loss": 0.0613, "step": 6927 }, { "epoch": 1.63, "learning_rate": 9.124234790409963e-06, "loss": 0.0163, "step": 6928 }, { "epoch": 1.63, "learning_rate": 9.121710196094146e-06, "loss": 0.0067, "step": 6929 }, { "epoch": 1.63, "learning_rate": 9.119185658190716e-06, "loss": 0.0269, "step": 6930 }, { "epoch": 1.63, "learning_rate": 9.116661176861818e-06, "loss": 0.0053, "step": 6931 }, { "epoch": 1.63, "learning_rate": 9.114136752269603e-06, "loss": 0.0091, "step": 6932 }, { "epoch": 1.63, "learning_rate": 9.111612384576208e-06, "loss": 0.0007, "step": 6933 }, { "epoch": 1.63, "learning_rate": 9.109088073943779e-06, "loss": 0.034, "step": 6934 }, { "epoch": 1.63, "learning_rate": 9.10656382053445e-06, "loss": 0.0716, "step": 6935 }, { "epoch": 1.63, "learning_rate": 9.104039624510353e-06, "loss": 0.0516, "step": 6936 }, { "epoch": 1.63, "learning_rate": 9.10151548603362e-06, "loss": 0.0154, "step": 6937 }, { "epoch": 1.63, "learning_rate": 9.098991405266369e-06, "loss": 0.0021, "step": 6938 }, { "epoch": 1.63, "learning_rate": 9.096467382370727e-06, "loss": 0.0114, "step": 6939 }, { "epoch": 1.63, "learning_rate": 9.093943417508806e-06, "loss": 0.0009, "step": 6940 }, { "epoch": 1.63, "learning_rate": 9.091419510842727e-06, "loss": 0.0259, "step": 6941 }, { "epoch": 1.63, "learning_rate": 9.088895662534594e-06, "loss": 0.0596, "step": 6942 }, { "epoch": 1.63, "learning_rate": 9.086371872746513e-06, "loss": 0.0231, "step": 6943 }, { "epoch": 1.63, "learning_rate": 9.083848141640593e-06, "loss": 0.0748, "step": 6944 }, { "epoch": 1.63, "learning_rate": 9.081324469378928e-06, "loss": 0.0311, "step": 6945 }, { "epoch": 1.63, "learning_rate": 9.078800856123615e-06, "loss": 0.0538, "step": 6946 }, { "epoch": 1.63, "learning_rate": 9.076277302036742e-06, "loss": 0.006, "step": 6947 }, { "epoch": 1.63, "learning_rate": 9.0737538072804e-06, "loss": 0.0047, "step": 6948 }, { "epoch": 1.63, "learning_rate": 9.07123037201667e-06, "loss": 0.0307, "step": 6949 }, { "epoch": 1.63, "learning_rate": 9.068706996407634e-06, "loss": 0.0146, "step": 6950 }, { "epoch": 1.63, "learning_rate": 9.066183680615364e-06, "loss": 0.0137, "step": 6951 }, { "epoch": 1.63, "learning_rate": 9.063660424801936e-06, "loss": 0.0037, "step": 6952 }, { "epoch": 1.63, "learning_rate": 9.061137229129417e-06, "loss": 0.0017, "step": 6953 }, { "epoch": 1.63, "learning_rate": 9.058614093759873e-06, "loss": 0.006, "step": 6954 }, { "epoch": 1.63, "learning_rate": 9.056091018855362e-06, "loss": 0.0111, "step": 6955 }, { "epoch": 1.63, "learning_rate": 9.053568004577939e-06, "loss": 0.0634, "step": 6956 }, { "epoch": 1.63, "learning_rate": 9.051045051089661e-06, "loss": 0.0174, "step": 6957 }, { "epoch": 1.63, "learning_rate": 9.048522158552575e-06, "loss": 0.0651, "step": 6958 }, { "epoch": 1.63, "learning_rate": 9.045999327128727e-06, "loss": 0.0011, "step": 6959 }, { "epoch": 1.63, "learning_rate": 9.043476556980153e-06, "loss": 0.0052, "step": 6960 }, { "epoch": 1.63, "learning_rate": 9.040953848268894e-06, "loss": 0.0437, "step": 6961 }, { "epoch": 1.63, "learning_rate": 9.038431201156987e-06, "loss": 0.0029, "step": 6962 }, { "epoch": 1.63, "learning_rate": 9.035908615806454e-06, "loss": 0.0326, "step": 6963 }, { "epoch": 1.63, "learning_rate": 9.033386092379326e-06, "loss": 0.009, "step": 6964 }, { "epoch": 1.63, "learning_rate": 9.030863631037619e-06, "loss": 0.0516, "step": 6965 }, { "epoch": 1.63, "learning_rate": 9.028341231943356e-06, "loss": 0.0449, "step": 6966 }, { "epoch": 1.64, "learning_rate": 9.025818895258543e-06, "loss": 0.0278, "step": 6967 }, { "epoch": 1.64, "learning_rate": 9.023296621145194e-06, "loss": 0.0464, "step": 6968 }, { "epoch": 1.64, "learning_rate": 9.020774409765315e-06, "loss": 0.0079, "step": 6969 }, { "epoch": 1.64, "learning_rate": 9.018252261280902e-06, "loss": 0.0049, "step": 6970 }, { "epoch": 1.64, "learning_rate": 9.015730175853958e-06, "loss": 0.1167, "step": 6971 }, { "epoch": 1.64, "learning_rate": 9.013208153646471e-06, "loss": 0.0439, "step": 6972 }, { "epoch": 1.64, "learning_rate": 9.010686194820437e-06, "loss": 0.0347, "step": 6973 }, { "epoch": 1.64, "learning_rate": 9.008164299537833e-06, "loss": 0.0055, "step": 6974 }, { "epoch": 1.64, "learning_rate": 9.005642467960645e-06, "loss": 0.0509, "step": 6975 }, { "epoch": 1.64, "learning_rate": 9.003120700250847e-06, "loss": 0.0024, "step": 6976 }, { "epoch": 1.64, "learning_rate": 9.00059899657041e-06, "loss": 0.0183, "step": 6977 }, { "epoch": 1.64, "learning_rate": 8.998077357081311e-06, "loss": 0.0081, "step": 6978 }, { "epoch": 1.64, "learning_rate": 8.995555781945503e-06, "loss": 0.0925, "step": 6979 }, { "epoch": 1.64, "learning_rate": 8.993034271324958e-06, "loss": 0.0484, "step": 6980 }, { "epoch": 1.64, "learning_rate": 8.99051282538162e-06, "loss": 0.0174, "step": 6981 }, { "epoch": 1.64, "learning_rate": 8.98799144427745e-06, "loss": 0.0009, "step": 6982 }, { "epoch": 1.64, "learning_rate": 8.985470128174391e-06, "loss": 0.0399, "step": 6983 }, { "epoch": 1.64, "learning_rate": 8.982948877234393e-06, "loss": 0.0131, "step": 6984 }, { "epoch": 1.64, "learning_rate": 8.980427691619386e-06, "loss": 0.0399, "step": 6985 }, { "epoch": 1.64, "learning_rate": 8.977906571491312e-06, "loss": 0.0478, "step": 6986 }, { "epoch": 1.64, "learning_rate": 8.975385517012102e-06, "loss": 0.0127, "step": 6987 }, { "epoch": 1.64, "learning_rate": 8.972864528343679e-06, "loss": 0.0399, "step": 6988 }, { "epoch": 1.64, "learning_rate": 8.97034360564797e-06, "loss": 0.0074, "step": 6989 }, { "epoch": 1.64, "learning_rate": 8.967822749086888e-06, "loss": 0.1201, "step": 6990 }, { "epoch": 1.64, "learning_rate": 8.965301958822353e-06, "loss": 0.0414, "step": 6991 }, { "epoch": 1.64, "learning_rate": 8.96278123501627e-06, "loss": 0.0272, "step": 6992 }, { "epoch": 1.64, "learning_rate": 8.960260577830549e-06, "loss": 0.0294, "step": 6993 }, { "epoch": 1.64, "learning_rate": 8.957739987427087e-06, "loss": 0.025, "step": 6994 }, { "epoch": 1.64, "learning_rate": 8.95521946396778e-06, "loss": 0.0625, "step": 6995 }, { "epoch": 1.64, "learning_rate": 8.95269900761453e-06, "loss": 0.0543, "step": 6996 }, { "epoch": 1.64, "learning_rate": 8.950178618529214e-06, "loss": 0.0533, "step": 6997 }, { "epoch": 1.64, "learning_rate": 8.947658296873727e-06, "loss": 0.0013, "step": 6998 }, { "epoch": 1.64, "learning_rate": 8.945138042809937e-06, "loss": 0.0077, "step": 6999 }, { "epoch": 1.64, "learning_rate": 8.942617856499731e-06, "loss": 0.0522, "step": 7000 }, { "epoch": 1.64, "learning_rate": 8.940097738104969e-06, "loss": 0.0074, "step": 7001 }, { "epoch": 1.64, "learning_rate": 8.937577687787529e-06, "loss": 0.0463, "step": 7002 }, { "epoch": 1.64, "learning_rate": 8.935057705709263e-06, "loss": 0.0036, "step": 7003 }, { "epoch": 1.64, "learning_rate": 8.932537792032034e-06, "loss": 0.1198, "step": 7004 }, { "epoch": 1.64, "learning_rate": 8.930017946917699e-06, "loss": 0.0743, "step": 7005 }, { "epoch": 1.64, "learning_rate": 8.9274981705281e-06, "loss": 0.0023, "step": 7006 }, { "epoch": 1.64, "learning_rate": 8.924978463025087e-06, "loss": 0.048, "step": 7007 }, { "epoch": 1.64, "learning_rate": 8.922458824570497e-06, "loss": 0.0209, "step": 7008 }, { "epoch": 1.64, "learning_rate": 8.919939255326168e-06, "loss": 0.0507, "step": 7009 }, { "epoch": 1.65, "learning_rate": 8.917419755453931e-06, "loss": 0.0387, "step": 7010 }, { "epoch": 1.65, "learning_rate": 8.914900325115613e-06, "loss": 0.0058, "step": 7011 }, { "epoch": 1.65, "learning_rate": 8.912380964473034e-06, "loss": 0.0061, "step": 7012 }, { "epoch": 1.65, "learning_rate": 8.909861673688013e-06, "loss": 0.0321, "step": 7013 }, { "epoch": 1.65, "learning_rate": 8.907342452922371e-06, "loss": 0.0448, "step": 7014 }, { "epoch": 1.65, "learning_rate": 8.904823302337903e-06, "loss": 0.0401, "step": 7015 }, { "epoch": 1.65, "learning_rate": 8.90230422209643e-06, "loss": 0.0105, "step": 7016 }, { "epoch": 1.65, "learning_rate": 8.899785212359737e-06, "loss": 0.0091, "step": 7017 }, { "epoch": 1.65, "learning_rate": 8.89726627328963e-06, "loss": 0.0044, "step": 7018 }, { "epoch": 1.65, "learning_rate": 8.894747405047893e-06, "loss": 0.0092, "step": 7019 }, { "epoch": 1.65, "learning_rate": 8.892228607796315e-06, "loss": 0.0411, "step": 7020 }, { "epoch": 1.65, "learning_rate": 8.889709881696682e-06, "loss": 0.0022, "step": 7021 }, { "epoch": 1.65, "learning_rate": 8.887191226910764e-06, "loss": 0.0486, "step": 7022 }, { "epoch": 1.65, "learning_rate": 8.884672643600341e-06, "loss": 0.0503, "step": 7023 }, { "epoch": 1.65, "learning_rate": 8.882154131927172e-06, "loss": 0.0339, "step": 7024 }, { "epoch": 1.65, "learning_rate": 8.879635692053031e-06, "loss": 0.0042, "step": 7025 }, { "epoch": 1.65, "learning_rate": 8.87711732413967e-06, "loss": 0.0072, "step": 7026 }, { "epoch": 1.65, "learning_rate": 8.874599028348845e-06, "loss": 0.0018, "step": 7027 }, { "epoch": 1.65, "learning_rate": 8.872080804842305e-06, "loss": 0.0312, "step": 7028 }, { "epoch": 1.65, "learning_rate": 8.869562653781792e-06, "loss": 0.0044, "step": 7029 }, { "epoch": 1.65, "learning_rate": 8.867044575329057e-06, "loss": 0.0188, "step": 7030 }, { "epoch": 1.65, "learning_rate": 8.864526569645824e-06, "loss": 0.0139, "step": 7031 }, { "epoch": 1.65, "learning_rate": 8.862008636893834e-06, "loss": 0.0253, "step": 7032 }, { "epoch": 1.65, "learning_rate": 8.859490777234801e-06, "loss": 0.0029, "step": 7033 }, { "epoch": 1.65, "learning_rate": 8.856972990830461e-06, "loss": 0.0036, "step": 7034 }, { "epoch": 1.65, "learning_rate": 8.854455277842517e-06, "loss": 0.0092, "step": 7035 }, { "epoch": 1.65, "learning_rate": 8.851937638432694e-06, "loss": 0.0082, "step": 7036 }, { "epoch": 1.65, "learning_rate": 8.849420072762688e-06, "loss": 0.0026, "step": 7037 }, { "epoch": 1.65, "learning_rate": 8.846902580994207e-06, "loss": 0.0158, "step": 7038 }, { "epoch": 1.65, "learning_rate": 8.844385163288955e-06, "loss": 0.0283, "step": 7039 }, { "epoch": 1.65, "learning_rate": 8.841867819808614e-06, "loss": 0.002, "step": 7040 }, { "epoch": 1.65, "learning_rate": 8.839350550714881e-06, "loss": 0.0272, "step": 7041 }, { "epoch": 1.65, "learning_rate": 8.836833356169435e-06, "loss": 0.0104, "step": 7042 }, { "epoch": 1.65, "learning_rate": 8.834316236333957e-06, "loss": 0.063, "step": 7043 }, { "epoch": 1.65, "learning_rate": 8.831799191370121e-06, "loss": 0.0532, "step": 7044 }, { "epoch": 1.65, "learning_rate": 8.829282221439596e-06, "loss": 0.0419, "step": 7045 }, { "epoch": 1.65, "learning_rate": 8.826765326704044e-06, "loss": 0.084, "step": 7046 }, { "epoch": 1.65, "learning_rate": 8.824248507325126e-06, "loss": 0.0435, "step": 7047 }, { "epoch": 1.65, "learning_rate": 8.821731763464504e-06, "loss": 0.0816, "step": 7048 }, { "epoch": 1.65, "learning_rate": 8.819215095283815e-06, "loss": 0.0491, "step": 7049 }, { "epoch": 1.65, "learning_rate": 8.816698502944718e-06, "loss": 0.027, "step": 7050 }, { "epoch": 1.65, "learning_rate": 8.81418198660884e-06, "loss": 0.008, "step": 7051 }, { "epoch": 1.66, "learning_rate": 8.811665546437829e-06, "loss": 0.0189, "step": 7052 }, { "epoch": 1.66, "learning_rate": 8.809149182593303e-06, "loss": 0.0378, "step": 7053 }, { "epoch": 1.66, "learning_rate": 8.806632895236895e-06, "loss": 0.0033, "step": 7054 }, { "epoch": 1.66, "learning_rate": 8.804116684530223e-06, "loss": 0.0171, "step": 7055 }, { "epoch": 1.66, "learning_rate": 8.801600550634905e-06, "loss": 0.0404, "step": 7056 }, { "epoch": 1.66, "learning_rate": 8.799084493712553e-06, "loss": 0.0101, "step": 7057 }, { "epoch": 1.66, "learning_rate": 8.79656851392477e-06, "loss": 0.0385, "step": 7058 }, { "epoch": 1.66, "learning_rate": 8.794052611433157e-06, "loss": 0.0226, "step": 7059 }, { "epoch": 1.66, "learning_rate": 8.791536786399309e-06, "loss": 0.0149, "step": 7060 }, { "epoch": 1.66, "learning_rate": 8.789021038984821e-06, "loss": 0.0268, "step": 7061 }, { "epoch": 1.66, "learning_rate": 8.786505369351273e-06, "loss": 0.0028, "step": 7062 }, { "epoch": 1.66, "learning_rate": 8.783989777660248e-06, "loss": 0.0315, "step": 7063 }, { "epoch": 1.66, "learning_rate": 8.78147426407333e-06, "loss": 0.0382, "step": 7064 }, { "epoch": 1.66, "learning_rate": 8.778958828752077e-06, "loss": 0.0098, "step": 7065 }, { "epoch": 1.66, "learning_rate": 8.776443471858066e-06, "loss": 0.0095, "step": 7066 }, { "epoch": 1.66, "learning_rate": 8.773928193552848e-06, "loss": 0.0432, "step": 7067 }, { "epoch": 1.66, "learning_rate": 8.77141299399799e-06, "loss": 0.0247, "step": 7068 }, { "epoch": 1.66, "learning_rate": 8.76889787335503e-06, "loss": 0.1096, "step": 7069 }, { "epoch": 1.66, "learning_rate": 8.766382831785528e-06, "loss": 0.0107, "step": 7070 }, { "epoch": 1.66, "learning_rate": 8.763867869451008e-06, "loss": 0.0067, "step": 7071 }, { "epoch": 1.66, "learning_rate": 8.761352986513018e-06, "loss": 0.0244, "step": 7072 }, { "epoch": 1.66, "learning_rate": 8.758838183133088e-06, "loss": 0.0115, "step": 7073 }, { "epoch": 1.66, "learning_rate": 8.756323459472735e-06, "loss": 0.018, "step": 7074 }, { "epoch": 1.66, "learning_rate": 8.75380881569349e-06, "loss": 0.0148, "step": 7075 }, { "epoch": 1.66, "learning_rate": 8.75129425195686e-06, "loss": 0.0277, "step": 7076 }, { "epoch": 1.66, "learning_rate": 8.748779768424359e-06, "loss": 0.0227, "step": 7077 }, { "epoch": 1.66, "learning_rate": 8.746265365257487e-06, "loss": 0.0415, "step": 7078 }, { "epoch": 1.66, "learning_rate": 8.74375104261775e-06, "loss": 0.0637, "step": 7079 }, { "epoch": 1.66, "learning_rate": 8.741236800666637e-06, "loss": 0.0598, "step": 7080 }, { "epoch": 1.66, "learning_rate": 8.738722639565637e-06, "loss": 0.0692, "step": 7081 }, { "epoch": 1.66, "learning_rate": 8.736208559476244e-06, "loss": 0.1015, "step": 7082 }, { "epoch": 1.66, "learning_rate": 8.733694560559924e-06, "loss": 0.0322, "step": 7083 }, { "epoch": 1.66, "learning_rate": 8.731180642978162e-06, "loss": 0.039, "step": 7084 }, { "epoch": 1.66, "learning_rate": 8.728666806892414e-06, "loss": 0.0313, "step": 7085 }, { "epoch": 1.66, "learning_rate": 8.726153052464156e-06, "loss": 0.0045, "step": 7086 }, { "epoch": 1.66, "learning_rate": 8.723639379854835e-06, "loss": 0.007, "step": 7087 }, { "epoch": 1.66, "learning_rate": 8.721125789225914e-06, "loss": 0.0019, "step": 7088 }, { "epoch": 1.66, "learning_rate": 8.718612280738829e-06, "loss": 0.0511, "step": 7089 }, { "epoch": 1.66, "learning_rate": 8.716098854555029e-06, "loss": 0.0082, "step": 7090 }, { "epoch": 1.66, "learning_rate": 8.713585510835953e-06, "loss": 0.0083, "step": 7091 }, { "epoch": 1.66, "learning_rate": 8.711072249743028e-06, "loss": 0.0567, "step": 7092 }, { "epoch": 1.66, "learning_rate": 8.708559071437682e-06, "loss": 0.0173, "step": 7093 }, { "epoch": 1.66, "learning_rate": 8.706045976081336e-06, "loss": 0.0343, "step": 7094 }, { "epoch": 1.67, "learning_rate": 8.703532963835407e-06, "loss": 0.0262, "step": 7095 }, { "epoch": 1.67, "learning_rate": 8.701020034861303e-06, "loss": 0.0694, "step": 7096 }, { "epoch": 1.67, "learning_rate": 8.69850718932043e-06, "loss": 0.0151, "step": 7097 }, { "epoch": 1.67, "learning_rate": 8.695994427374187e-06, "loss": 0.0548, "step": 7098 }, { "epoch": 1.67, "learning_rate": 8.693481749183964e-06, "loss": 0.0377, "step": 7099 }, { "epoch": 1.67, "learning_rate": 8.690969154911163e-06, "loss": 0.0062, "step": 7100 }, { "epoch": 1.67, "learning_rate": 8.688456644717149e-06, "loss": 0.0234, "step": 7101 }, { "epoch": 1.67, "learning_rate": 8.68594421876332e-06, "loss": 0.0552, "step": 7102 }, { "epoch": 1.67, "learning_rate": 8.683431877211029e-06, "loss": 0.0415, "step": 7103 }, { "epoch": 1.67, "learning_rate": 8.68091962022166e-06, "loss": 0.0188, "step": 7104 }, { "epoch": 1.67, "learning_rate": 8.678407447956561e-06, "loss": 0.039, "step": 7105 }, { "epoch": 1.67, "learning_rate": 8.675895360577098e-06, "loss": 0.0047, "step": 7106 }, { "epoch": 1.67, "learning_rate": 8.67338335824462e-06, "loss": 0.0019, "step": 7107 }, { "epoch": 1.67, "learning_rate": 8.67087144112047e-06, "loss": 0.0149, "step": 7108 }, { "epoch": 1.67, "learning_rate": 8.66835960936599e-06, "loss": 0.0343, "step": 7109 }, { "epoch": 1.67, "learning_rate": 8.66584786314251e-06, "loss": 0.0138, "step": 7110 }, { "epoch": 1.67, "learning_rate": 8.663336202611368e-06, "loss": 0.0149, "step": 7111 }, { "epoch": 1.67, "learning_rate": 8.660824627933877e-06, "loss": 0.0276, "step": 7112 }, { "epoch": 1.67, "learning_rate": 8.658313139271363e-06, "loss": 0.0226, "step": 7113 }, { "epoch": 1.67, "learning_rate": 8.655801736785133e-06, "loss": 0.0273, "step": 7114 }, { "epoch": 1.67, "learning_rate": 8.653290420636494e-06, "loss": 0.0062, "step": 7115 }, { "epoch": 1.67, "learning_rate": 8.650779190986757e-06, "loss": 0.0007, "step": 7116 }, { "epoch": 1.67, "learning_rate": 8.648268047997202e-06, "loss": 0.1596, "step": 7117 }, { "epoch": 1.67, "learning_rate": 8.645756991829136e-06, "loss": 0.0081, "step": 7118 }, { "epoch": 1.67, "learning_rate": 8.643246022643828e-06, "loss": 0.0266, "step": 7119 }, { "epoch": 1.67, "learning_rate": 8.640735140602571e-06, "loss": 0.0319, "step": 7120 }, { "epoch": 1.67, "learning_rate": 8.638224345866624e-06, "loss": 0.0363, "step": 7121 }, { "epoch": 1.67, "learning_rate": 8.63571363859727e-06, "loss": 0.0759, "step": 7122 }, { "epoch": 1.67, "learning_rate": 8.633203018955758e-06, "loss": 0.0228, "step": 7123 }, { "epoch": 1.67, "learning_rate": 8.630692487103352e-06, "loss": 0.0508, "step": 7124 }, { "epoch": 1.67, "learning_rate": 8.628182043201303e-06, "loss": 0.0493, "step": 7125 }, { "epoch": 1.67, "learning_rate": 8.625671687410854e-06, "loss": 0.0197, "step": 7126 }, { "epoch": 1.67, "learning_rate": 8.623161419893247e-06, "loss": 0.0669, "step": 7127 }, { "epoch": 1.67, "learning_rate": 8.620651240809711e-06, "loss": 0.0081, "step": 7128 }, { "epoch": 1.67, "learning_rate": 8.618141150321482e-06, "loss": 0.074, "step": 7129 }, { "epoch": 1.67, "learning_rate": 8.615631148589775e-06, "loss": 0.0281, "step": 7130 }, { "epoch": 1.67, "learning_rate": 8.613121235775812e-06, "loss": 0.0458, "step": 7131 }, { "epoch": 1.67, "learning_rate": 8.610611412040802e-06, "loss": 0.0334, "step": 7132 }, { "epoch": 1.67, "learning_rate": 8.608101677545946e-06, "loss": 0.0275, "step": 7133 }, { "epoch": 1.67, "learning_rate": 8.605592032452457e-06, "loss": 0.0228, "step": 7134 }, { "epoch": 1.67, "learning_rate": 8.603082476921514e-06, "loss": 0.0168, "step": 7135 }, { "epoch": 1.67, "learning_rate": 8.600573011114319e-06, "loss": 0.0085, "step": 7136 }, { "epoch": 1.68, "learning_rate": 8.598063635192042e-06, "loss": 0.0026, "step": 7137 }, { "epoch": 1.68, "learning_rate": 8.59555434931587e-06, "loss": 0.0259, "step": 7138 }, { "epoch": 1.68, "learning_rate": 8.593045153646964e-06, "loss": 0.0681, "step": 7139 }, { "epoch": 1.68, "learning_rate": 8.590536048346495e-06, "loss": 0.0127, "step": 7140 }, { "epoch": 1.68, "learning_rate": 8.588027033575624e-06, "loss": 0.0143, "step": 7141 }, { "epoch": 1.68, "learning_rate": 8.5855181094955e-06, "loss": 0.0921, "step": 7142 }, { "epoch": 1.68, "learning_rate": 8.583009276267277e-06, "loss": 0.0039, "step": 7143 }, { "epoch": 1.68, "learning_rate": 8.58050053405209e-06, "loss": 0.0022, "step": 7144 }, { "epoch": 1.68, "learning_rate": 8.577991883011079e-06, "loss": 0.0587, "step": 7145 }, { "epoch": 1.68, "learning_rate": 8.57548332330537e-06, "loss": 0.0446, "step": 7146 }, { "epoch": 1.68, "learning_rate": 8.572974855096094e-06, "loss": 0.0041, "step": 7147 }, { "epoch": 1.68, "learning_rate": 8.570466478544364e-06, "loss": 0.0068, "step": 7148 }, { "epoch": 1.68, "learning_rate": 8.567958193811293e-06, "loss": 0.0066, "step": 7149 }, { "epoch": 1.68, "learning_rate": 8.565450001057991e-06, "loss": 0.0509, "step": 7150 }, { "epoch": 1.68, "learning_rate": 8.562941900445553e-06, "loss": 0.0307, "step": 7151 }, { "epoch": 1.68, "learning_rate": 8.560433892135084e-06, "loss": 0.01, "step": 7152 }, { "epoch": 1.68, "learning_rate": 8.55792597628766e-06, "loss": 0.0271, "step": 7153 }, { "epoch": 1.68, "learning_rate": 8.555418153064377e-06, "loss": 0.0309, "step": 7154 }, { "epoch": 1.68, "learning_rate": 8.552910422626298e-06, "loss": 0.058, "step": 7155 }, { "epoch": 1.68, "learning_rate": 8.550402785134508e-06, "loss": 0.004, "step": 7156 }, { "epoch": 1.68, "learning_rate": 8.547895240750058e-06, "loss": 0.0169, "step": 7157 }, { "epoch": 1.68, "learning_rate": 8.545387789634018e-06, "loss": 0.0234, "step": 7158 }, { "epoch": 1.68, "learning_rate": 8.54288043194744e-06, "loss": 0.0609, "step": 7159 }, { "epoch": 1.68, "learning_rate": 8.540373167851364e-06, "loss": 0.002, "step": 7160 }, { "epoch": 1.68, "learning_rate": 8.53786599750684e-06, "loss": 0.0221, "step": 7161 }, { "epoch": 1.68, "learning_rate": 8.535358921074897e-06, "loss": 0.0068, "step": 7162 }, { "epoch": 1.68, "learning_rate": 8.532851938716566e-06, "loss": 0.0129, "step": 7163 }, { "epoch": 1.68, "learning_rate": 8.53034505059287e-06, "loss": 0.0349, "step": 7164 }, { "epoch": 1.68, "learning_rate": 8.527838256864827e-06, "loss": 0.056, "step": 7165 }, { "epoch": 1.68, "learning_rate": 8.525331557693444e-06, "loss": 0.0532, "step": 7166 }, { "epoch": 1.68, "learning_rate": 8.522824953239728e-06, "loss": 0.003, "step": 7167 }, { "epoch": 1.68, "learning_rate": 8.520318443664682e-06, "loss": 0.0754, "step": 7168 }, { "epoch": 1.68, "learning_rate": 8.51781202912929e-06, "loss": 0.013, "step": 7169 }, { "epoch": 1.68, "learning_rate": 8.51530570979455e-06, "loss": 0.0155, "step": 7170 }, { "epoch": 1.68, "learning_rate": 8.512799485821429e-06, "loss": 0.0184, "step": 7171 }, { "epoch": 1.68, "learning_rate": 8.510293357370913e-06, "loss": 0.0301, "step": 7172 }, { "epoch": 1.68, "learning_rate": 8.50778732460396e-06, "loss": 0.0384, "step": 7173 }, { "epoch": 1.68, "learning_rate": 8.505281387681543e-06, "loss": 0.0662, "step": 7174 }, { "epoch": 1.68, "learning_rate": 8.502775546764607e-06, "loss": 0.0285, "step": 7175 }, { "epoch": 1.68, "learning_rate": 8.500269802014107e-06, "loss": 0.0102, "step": 7176 }, { "epoch": 1.68, "learning_rate": 8.49776415359099e-06, "loss": 0.036, "step": 7177 }, { "epoch": 1.68, "learning_rate": 8.495258601656186e-06, "loss": 0.0409, "step": 7178 }, { "epoch": 1.68, "learning_rate": 8.492753146370631e-06, "loss": 0.0197, "step": 7179 }, { "epoch": 1.69, "learning_rate": 8.490247787895249e-06, "loss": 0.04, "step": 7180 }, { "epoch": 1.69, "learning_rate": 8.487742526390958e-06, "loss": 0.0033, "step": 7181 }, { "epoch": 1.69, "learning_rate": 8.485237362018669e-06, "loss": 0.0363, "step": 7182 }, { "epoch": 1.69, "learning_rate": 8.482732294939292e-06, "loss": 0.0012, "step": 7183 }, { "epoch": 1.69, "learning_rate": 8.480227325313723e-06, "loss": 0.0217, "step": 7184 }, { "epoch": 1.69, "learning_rate": 8.477722453302859e-06, "loss": 0.1106, "step": 7185 }, { "epoch": 1.69, "learning_rate": 8.475217679067586e-06, "loss": 0.0716, "step": 7186 }, { "epoch": 1.69, "learning_rate": 8.472713002768781e-06, "loss": 0.0224, "step": 7187 }, { "epoch": 1.69, "learning_rate": 8.47020842456733e-06, "loss": 0.0418, "step": 7188 }, { "epoch": 1.69, "learning_rate": 8.467703944624089e-06, "loss": 0.0787, "step": 7189 }, { "epoch": 1.69, "learning_rate": 8.465199563099931e-06, "loss": 0.0162, "step": 7190 }, { "epoch": 1.69, "learning_rate": 8.4626952801557e-06, "loss": 0.0131, "step": 7191 }, { "epoch": 1.69, "learning_rate": 8.460191095952254e-06, "loss": 0.0221, "step": 7192 }, { "epoch": 1.69, "learning_rate": 8.457687010650438e-06, "loss": 0.033, "step": 7193 }, { "epoch": 1.69, "learning_rate": 8.455183024411085e-06, "loss": 0.0559, "step": 7194 }, { "epoch": 1.69, "learning_rate": 8.452679137395026e-06, "loss": 0.0049, "step": 7195 }, { "epoch": 1.69, "learning_rate": 8.450175349763083e-06, "loss": 0.0254, "step": 7196 }, { "epoch": 1.69, "learning_rate": 8.44767166167608e-06, "loss": 0.0158, "step": 7197 }, { "epoch": 1.69, "learning_rate": 8.44516807329482e-06, "loss": 0.0519, "step": 7198 }, { "epoch": 1.69, "learning_rate": 8.442664584780116e-06, "loss": 0.0082, "step": 7199 }, { "epoch": 1.69, "learning_rate": 8.440161196292763e-06, "loss": 0.0047, "step": 7200 }, { "epoch": 1.69, "learning_rate": 8.437657907993549e-06, "loss": 0.0027, "step": 7201 }, { "epoch": 1.69, "learning_rate": 8.435154720043268e-06, "loss": 0.0499, "step": 7202 }, { "epoch": 1.69, "learning_rate": 8.432651632602695e-06, "loss": 0.0154, "step": 7203 }, { "epoch": 1.69, "learning_rate": 8.430148645832602e-06, "loss": 0.014, "step": 7204 }, { "epoch": 1.69, "learning_rate": 8.427645759893754e-06, "loss": 0.0445, "step": 7205 }, { "epoch": 1.69, "learning_rate": 8.42514297494692e-06, "loss": 0.0361, "step": 7206 }, { "epoch": 1.69, "learning_rate": 8.422640291152838e-06, "loss": 0.0319, "step": 7207 }, { "epoch": 1.69, "learning_rate": 8.42013770867227e-06, "loss": 0.0075, "step": 7208 }, { "epoch": 1.69, "learning_rate": 8.417635227665947e-06, "loss": 0.015, "step": 7209 }, { "epoch": 1.69, "learning_rate": 8.415132848294606e-06, "loss": 0.0205, "step": 7210 }, { "epoch": 1.69, "learning_rate": 8.412630570718975e-06, "loss": 0.0188, "step": 7211 }, { "epoch": 1.69, "learning_rate": 8.410128395099773e-06, "loss": 0.0249, "step": 7212 }, { "epoch": 1.69, "learning_rate": 8.407626321597717e-06, "loss": 0.0052, "step": 7213 }, { "epoch": 1.69, "learning_rate": 8.405124350373511e-06, "loss": 0.0108, "step": 7214 }, { "epoch": 1.69, "learning_rate": 8.402622481587862e-06, "loss": 0.0522, "step": 7215 }, { "epoch": 1.69, "learning_rate": 8.400120715401455e-06, "loss": 0.014, "step": 7216 }, { "epoch": 1.69, "learning_rate": 8.397619051974988e-06, "loss": 0.0769, "step": 7217 }, { "epoch": 1.69, "learning_rate": 8.395117491469135e-06, "loss": 0.0021, "step": 7218 }, { "epoch": 1.69, "learning_rate": 8.392616034044574e-06, "loss": 0.011, "step": 7219 }, { "epoch": 1.69, "learning_rate": 8.390114679861974e-06, "loss": 0.0017, "step": 7220 }, { "epoch": 1.69, "learning_rate": 8.387613429081993e-06, "loss": 0.0265, "step": 7221 }, { "epoch": 1.69, "learning_rate": 8.38511228186529e-06, "loss": 0.0069, "step": 7222 }, { "epoch": 1.7, "learning_rate": 8.382611238372512e-06, "loss": 0.0487, "step": 7223 }, { "epoch": 1.7, "learning_rate": 8.3801102987643e-06, "loss": 0.0243, "step": 7224 }, { "epoch": 1.7, "learning_rate": 8.377609463201284e-06, "loss": 0.0833, "step": 7225 }, { "epoch": 1.7, "learning_rate": 8.375108731844099e-06, "loss": 0.0042, "step": 7226 }, { "epoch": 1.7, "learning_rate": 8.372608104853365e-06, "loss": 0.0445, "step": 7227 }, { "epoch": 1.7, "learning_rate": 8.370107582389697e-06, "loss": 0.0367, "step": 7228 }, { "epoch": 1.7, "learning_rate": 8.367607164613703e-06, "loss": 0.0388, "step": 7229 }, { "epoch": 1.7, "learning_rate": 8.365106851685982e-06, "loss": 0.014, "step": 7230 }, { "epoch": 1.7, "learning_rate": 8.362606643767132e-06, "loss": 0.0061, "step": 7231 }, { "epoch": 1.7, "learning_rate": 8.360106541017737e-06, "loss": 0.0062, "step": 7232 }, { "epoch": 1.7, "learning_rate": 8.357606543598382e-06, "loss": 0.0656, "step": 7233 }, { "epoch": 1.7, "learning_rate": 8.355106651669638e-06, "loss": 0.0188, "step": 7234 }, { "epoch": 1.7, "learning_rate": 8.352606865392073e-06, "loss": 0.0024, "step": 7235 }, { "epoch": 1.7, "learning_rate": 8.350107184926251e-06, "loss": 0.0025, "step": 7236 }, { "epoch": 1.7, "learning_rate": 8.347607610432724e-06, "loss": 0.09, "step": 7237 }, { "epoch": 1.7, "learning_rate": 8.34510814207204e-06, "loss": 0.0175, "step": 7238 }, { "epoch": 1.7, "learning_rate": 8.342608780004735e-06, "loss": 0.0429, "step": 7239 }, { "epoch": 1.7, "learning_rate": 8.340109524391348e-06, "loss": 0.0358, "step": 7240 }, { "epoch": 1.7, "learning_rate": 8.337610375392404e-06, "loss": 0.0435, "step": 7241 }, { "epoch": 1.7, "learning_rate": 8.335111333168422e-06, "loss": 0.0986, "step": 7242 }, { "epoch": 1.7, "learning_rate": 8.332612397879912e-06, "loss": 0.0005, "step": 7243 }, { "epoch": 1.7, "learning_rate": 8.330113569687386e-06, "loss": 0.0036, "step": 7244 }, { "epoch": 1.7, "learning_rate": 8.327614848751343e-06, "loss": 0.0234, "step": 7245 }, { "epoch": 1.7, "learning_rate": 8.32511623523227e-06, "loss": 0.0051, "step": 7246 }, { "epoch": 1.7, "learning_rate": 8.32261772929066e-06, "loss": 0.0309, "step": 7247 }, { "epoch": 1.7, "learning_rate": 8.320119331086984e-06, "loss": 0.0416, "step": 7248 }, { "epoch": 1.7, "learning_rate": 8.31762104078172e-06, "loss": 0.0152, "step": 7249 }, { "epoch": 1.7, "learning_rate": 8.315122858535327e-06, "loss": 0.0401, "step": 7250 }, { "epoch": 1.7, "learning_rate": 8.312624784508268e-06, "loss": 0.0358, "step": 7251 }, { "epoch": 1.7, "learning_rate": 8.310126818860987e-06, "loss": 0.037, "step": 7252 }, { "epoch": 1.7, "learning_rate": 8.307628961753932e-06, "loss": 0.0097, "step": 7253 }, { "epoch": 1.7, "learning_rate": 8.305131213347544e-06, "loss": 0.0821, "step": 7254 }, { "epoch": 1.7, "learning_rate": 8.302633573802246e-06, "loss": 0.0045, "step": 7255 }, { "epoch": 1.7, "learning_rate": 8.300136043278466e-06, "loss": 0.0176, "step": 7256 }, { "epoch": 1.7, "learning_rate": 8.297638621936615e-06, "loss": 0.1188, "step": 7257 }, { "epoch": 1.7, "learning_rate": 8.295141309937108e-06, "loss": 0.0513, "step": 7258 }, { "epoch": 1.7, "learning_rate": 8.292644107440337e-06, "loss": 0.0022, "step": 7259 }, { "epoch": 1.7, "learning_rate": 8.29014701460671e-06, "loss": 0.0253, "step": 7260 }, { "epoch": 1.7, "learning_rate": 8.287650031596601e-06, "loss": 0.0286, "step": 7261 }, { "epoch": 1.7, "learning_rate": 8.2851531585704e-06, "loss": 0.0198, "step": 7262 }, { "epoch": 1.7, "learning_rate": 8.282656395688483e-06, "loss": 0.0021, "step": 7263 }, { "epoch": 1.7, "learning_rate": 8.280159743111207e-06, "loss": 0.0183, "step": 7264 }, { "epoch": 1.71, "learning_rate": 8.277663200998939e-06, "loss": 0.0548, "step": 7265 }, { "epoch": 1.71, "learning_rate": 8.275166769512028e-06, "loss": 0.0067, "step": 7266 }, { "epoch": 1.71, "learning_rate": 8.272670448810821e-06, "loss": 0.0053, "step": 7267 }, { "epoch": 1.71, "learning_rate": 8.270174239055654e-06, "loss": 0.0277, "step": 7268 }, { "epoch": 1.71, "learning_rate": 8.267678140406858e-06, "loss": 0.0047, "step": 7269 }, { "epoch": 1.71, "learning_rate": 8.265182153024763e-06, "loss": 0.0173, "step": 7270 }, { "epoch": 1.71, "learning_rate": 8.262686277069678e-06, "loss": 0.0179, "step": 7271 }, { "epoch": 1.71, "learning_rate": 8.260190512701918e-06, "loss": 0.0036, "step": 7272 }, { "epoch": 1.71, "learning_rate": 8.25769486008178e-06, "loss": 0.0365, "step": 7273 }, { "epoch": 1.71, "learning_rate": 8.255199319369565e-06, "loss": 0.0026, "step": 7274 }, { "epoch": 1.71, "learning_rate": 8.252703890725558e-06, "loss": 0.0027, "step": 7275 }, { "epoch": 1.71, "learning_rate": 8.25020857431004e-06, "loss": 0.0566, "step": 7276 }, { "epoch": 1.71, "learning_rate": 8.247713370283285e-06, "loss": 0.043, "step": 7277 }, { "epoch": 1.71, "learning_rate": 8.245218278805558e-06, "loss": 0.0102, "step": 7278 }, { "epoch": 1.71, "learning_rate": 8.242723300037123e-06, "loss": 0.0165, "step": 7279 }, { "epoch": 1.71, "learning_rate": 8.240228434138227e-06, "loss": 0.0016, "step": 7280 }, { "epoch": 1.71, "learning_rate": 8.23773368126912e-06, "loss": 0.0325, "step": 7281 }, { "epoch": 1.71, "learning_rate": 8.235239041590033e-06, "loss": 0.0416, "step": 7282 }, { "epoch": 1.71, "learning_rate": 8.232744515261201e-06, "loss": 0.0353, "step": 7283 }, { "epoch": 1.71, "learning_rate": 8.230250102442844e-06, "loss": 0.0852, "step": 7284 }, { "epoch": 1.71, "learning_rate": 8.227755803295181e-06, "loss": 0.0034, "step": 7285 }, { "epoch": 1.71, "learning_rate": 8.225261617978415e-06, "loss": 0.0212, "step": 7286 }, { "epoch": 1.71, "learning_rate": 8.222767546652752e-06, "loss": 0.0051, "step": 7287 }, { "epoch": 1.71, "learning_rate": 8.220273589478386e-06, "loss": 0.0087, "step": 7288 }, { "epoch": 1.71, "learning_rate": 8.217779746615498e-06, "loss": 0.0157, "step": 7289 }, { "epoch": 1.71, "learning_rate": 8.215286018224274e-06, "loss": 0.0038, "step": 7290 }, { "epoch": 1.71, "learning_rate": 8.21279240446488e-06, "loss": 0.02, "step": 7291 }, { "epoch": 1.71, "learning_rate": 8.210298905497485e-06, "loss": 0.0993, "step": 7292 }, { "epoch": 1.71, "learning_rate": 8.20780552148224e-06, "loss": 0.0105, "step": 7293 }, { "epoch": 1.71, "learning_rate": 8.205312252579301e-06, "loss": 0.0593, "step": 7294 }, { "epoch": 1.71, "learning_rate": 8.202819098948804e-06, "loss": 0.0041, "step": 7295 }, { "epoch": 1.71, "learning_rate": 8.200326060750886e-06, "loss": 0.0155, "step": 7296 }, { "epoch": 1.71, "learning_rate": 8.19783313814568e-06, "loss": 0.058, "step": 7297 }, { "epoch": 1.71, "learning_rate": 8.195340331293297e-06, "loss": 0.0271, "step": 7298 }, { "epoch": 1.71, "learning_rate": 8.192847640353858e-06, "loss": 0.0162, "step": 7299 }, { "epoch": 1.71, "learning_rate": 8.190355065487462e-06, "loss": 0.0185, "step": 7300 }, { "epoch": 1.71, "learning_rate": 8.18786260685421e-06, "loss": 0.0428, "step": 7301 }, { "epoch": 1.71, "learning_rate": 8.185370264614188e-06, "loss": 0.0823, "step": 7302 }, { "epoch": 1.71, "learning_rate": 8.182878038927483e-06, "loss": 0.0241, "step": 7303 }, { "epoch": 1.71, "learning_rate": 8.180385929954167e-06, "loss": 0.0797, "step": 7304 }, { "epoch": 1.71, "learning_rate": 8.17789393785431e-06, "loss": 0.0467, "step": 7305 }, { "epoch": 1.71, "learning_rate": 8.17540206278797e-06, "loss": 0.0047, "step": 7306 }, { "epoch": 1.71, "learning_rate": 8.172910304915203e-06, "loss": 0.0219, "step": 7307 }, { "epoch": 1.72, "learning_rate": 8.170418664396052e-06, "loss": 0.0734, "step": 7308 }, { "epoch": 1.72, "learning_rate": 8.167927141390552e-06, "loss": 0.0055, "step": 7309 }, { "epoch": 1.72, "learning_rate": 8.165435736058738e-06, "loss": 0.0247, "step": 7310 }, { "epoch": 1.72, "learning_rate": 8.16294444856063e-06, "loss": 0.0038, "step": 7311 }, { "epoch": 1.72, "learning_rate": 8.16045327905624e-06, "loss": 0.0143, "step": 7312 }, { "epoch": 1.72, "learning_rate": 8.157962227705583e-06, "loss": 0.0502, "step": 7313 }, { "epoch": 1.72, "learning_rate": 8.155471294668648e-06, "loss": 0.0147, "step": 7314 }, { "epoch": 1.72, "learning_rate": 8.15298048010544e-06, "loss": 0.0538, "step": 7315 }, { "epoch": 1.72, "learning_rate": 8.150489784175932e-06, "loss": 0.0093, "step": 7316 }, { "epoch": 1.72, "learning_rate": 8.147999207040109e-06, "loss": 0.0171, "step": 7317 }, { "epoch": 1.72, "learning_rate": 8.145508748857936e-06, "loss": 0.0185, "step": 7318 }, { "epoch": 1.72, "learning_rate": 8.143018409789375e-06, "loss": 0.0348, "step": 7319 }, { "epoch": 1.72, "learning_rate": 8.140528189994382e-06, "loss": 0.0043, "step": 7320 }, { "epoch": 1.72, "learning_rate": 8.1380380896329e-06, "loss": 0.0143, "step": 7321 }, { "epoch": 1.72, "learning_rate": 8.13554810886487e-06, "loss": 0.0136, "step": 7322 }, { "epoch": 1.72, "learning_rate": 8.133058247850225e-06, "loss": 0.019, "step": 7323 }, { "epoch": 1.72, "learning_rate": 8.130568506748884e-06, "loss": 0.0518, "step": 7324 }, { "epoch": 1.72, "learning_rate": 8.128078885720765e-06, "loss": 0.0497, "step": 7325 }, { "epoch": 1.72, "learning_rate": 8.125589384925777e-06, "loss": 0.0019, "step": 7326 }, { "epoch": 1.72, "learning_rate": 8.123100004523813e-06, "loss": 0.0451, "step": 7327 }, { "epoch": 1.72, "learning_rate": 8.120610744674778e-06, "loss": 0.034, "step": 7328 }, { "epoch": 1.72, "learning_rate": 8.118121605538545e-06, "loss": 0.0015, "step": 7329 }, { "epoch": 1.72, "learning_rate": 8.115632587274993e-06, "loss": 0.0416, "step": 7330 }, { "epoch": 1.72, "learning_rate": 8.113143690044001e-06, "loss": 0.0461, "step": 7331 }, { "epoch": 1.72, "learning_rate": 8.110654914005416e-06, "loss": 0.0273, "step": 7332 }, { "epoch": 1.72, "learning_rate": 8.108166259319103e-06, "loss": 0.0011, "step": 7333 }, { "epoch": 1.72, "learning_rate": 8.105677726144903e-06, "loss": 0.0048, "step": 7334 }, { "epoch": 1.72, "learning_rate": 8.103189314642654e-06, "loss": 0.0137, "step": 7335 }, { "epoch": 1.72, "learning_rate": 8.100701024972186e-06, "loss": 0.0132, "step": 7336 }, { "epoch": 1.72, "learning_rate": 8.098212857293324e-06, "loss": 0.0025, "step": 7337 }, { "epoch": 1.72, "learning_rate": 8.09572481176588e-06, "loss": 0.0076, "step": 7338 }, { "epoch": 1.72, "learning_rate": 8.093236888549658e-06, "loss": 0.0134, "step": 7339 }, { "epoch": 1.72, "learning_rate": 8.090749087804465e-06, "loss": 0.0062, "step": 7340 }, { "epoch": 1.72, "learning_rate": 8.088261409690084e-06, "loss": 0.0021, "step": 7341 }, { "epoch": 1.72, "learning_rate": 8.085773854366304e-06, "loss": 0.0234, "step": 7342 }, { "epoch": 1.72, "learning_rate": 8.083286421992893e-06, "loss": 0.0891, "step": 7343 }, { "epoch": 1.72, "learning_rate": 8.080799112729626e-06, "loss": 0.0602, "step": 7344 }, { "epoch": 1.72, "learning_rate": 8.078311926736258e-06, "loss": 0.0066, "step": 7345 }, { "epoch": 1.72, "learning_rate": 8.07582486417254e-06, "loss": 0.0064, "step": 7346 }, { "epoch": 1.72, "learning_rate": 8.073337925198219e-06, "loss": 0.0286, "step": 7347 }, { "epoch": 1.72, "learning_rate": 8.070851109973025e-06, "loss": 0.0095, "step": 7348 }, { "epoch": 1.72, "learning_rate": 8.068364418656695e-06, "loss": 0.0375, "step": 7349 }, { "epoch": 1.72, "learning_rate": 8.065877851408937e-06, "loss": 0.0015, "step": 7350 }, { "epoch": 1.73, "learning_rate": 8.063391408389473e-06, "loss": 0.0206, "step": 7351 }, { "epoch": 1.73, "learning_rate": 8.060905089758e-06, "loss": 0.0531, "step": 7352 }, { "epoch": 1.73, "learning_rate": 8.05841889567422e-06, "loss": 0.0439, "step": 7353 }, { "epoch": 1.73, "learning_rate": 8.055932826297814e-06, "loss": 0.0101, "step": 7354 }, { "epoch": 1.73, "learning_rate": 8.053446881788465e-06, "loss": 0.0133, "step": 7355 }, { "epoch": 1.73, "learning_rate": 8.050961062305846e-06, "loss": 0.0532, "step": 7356 }, { "epoch": 1.73, "learning_rate": 8.048475368009616e-06, "loss": 0.0142, "step": 7357 }, { "epoch": 1.73, "learning_rate": 8.045989799059438e-06, "loss": 0.0385, "step": 7358 }, { "epoch": 1.73, "learning_rate": 8.043504355614951e-06, "loss": 0.0089, "step": 7359 }, { "epoch": 1.73, "learning_rate": 8.041019037835803e-06, "loss": 0.0388, "step": 7360 }, { "epoch": 1.73, "learning_rate": 8.038533845881619e-06, "loss": 0.0523, "step": 7361 }, { "epoch": 1.73, "learning_rate": 8.036048779912027e-06, "loss": 0.0148, "step": 7362 }, { "epoch": 1.73, "learning_rate": 8.033563840086638e-06, "loss": 0.0028, "step": 7363 }, { "epoch": 1.73, "learning_rate": 8.03107902656506e-06, "loss": 0.0621, "step": 7364 }, { "epoch": 1.73, "learning_rate": 8.028594339506899e-06, "loss": 0.0193, "step": 7365 }, { "epoch": 1.73, "learning_rate": 8.026109779071734e-06, "loss": 0.02, "step": 7366 }, { "epoch": 1.73, "learning_rate": 8.02362534541916e-06, "loss": 0.0002, "step": 7367 }, { "epoch": 1.73, "learning_rate": 8.02114103870874e-06, "loss": 0.0354, "step": 7368 }, { "epoch": 1.73, "learning_rate": 8.018656859100051e-06, "loss": 0.0249, "step": 7369 }, { "epoch": 1.73, "learning_rate": 8.016172806752646e-06, "loss": 0.0148, "step": 7370 }, { "epoch": 1.73, "learning_rate": 8.013688881826075e-06, "loss": 0.0056, "step": 7371 }, { "epoch": 1.73, "learning_rate": 8.011205084479882e-06, "loss": 0.0405, "step": 7372 }, { "epoch": 1.73, "learning_rate": 8.0087214148736e-06, "loss": 0.0113, "step": 7373 }, { "epoch": 1.73, "learning_rate": 8.006237873166756e-06, "loss": 0.1156, "step": 7374 }, { "epoch": 1.73, "learning_rate": 8.003754459518862e-06, "loss": 0.0137, "step": 7375 }, { "epoch": 1.73, "learning_rate": 8.001271174089436e-06, "loss": 0.0021, "step": 7376 }, { "epoch": 1.73, "learning_rate": 7.998788017037969e-06, "loss": 0.0265, "step": 7377 }, { "epoch": 1.73, "learning_rate": 7.996304988523963e-06, "loss": 0.0276, "step": 7378 }, { "epoch": 1.73, "learning_rate": 7.993822088706896e-06, "loss": 0.0053, "step": 7379 }, { "epoch": 1.73, "learning_rate": 7.991339317746247e-06, "loss": 0.0286, "step": 7380 }, { "epoch": 1.73, "learning_rate": 7.988856675801482e-06, "loss": 0.0056, "step": 7381 }, { "epoch": 1.73, "learning_rate": 7.986374163032061e-06, "loss": 0.0262, "step": 7382 }, { "epoch": 1.73, "learning_rate": 7.983891779597441e-06, "loss": 0.0107, "step": 7383 }, { "epoch": 1.73, "learning_rate": 7.981409525657054e-06, "loss": 0.0088, "step": 7384 }, { "epoch": 1.73, "learning_rate": 7.978927401370346e-06, "loss": 0.0271, "step": 7385 }, { "epoch": 1.73, "learning_rate": 7.976445406896734e-06, "loss": 0.0405, "step": 7386 }, { "epoch": 1.73, "learning_rate": 7.973963542395646e-06, "loss": 0.0211, "step": 7387 }, { "epoch": 1.73, "learning_rate": 7.971481808026478e-06, "loss": 0.0588, "step": 7388 }, { "epoch": 1.73, "learning_rate": 7.969000203948643e-06, "loss": 0.0036, "step": 7389 }, { "epoch": 1.73, "learning_rate": 7.966518730321533e-06, "loss": 0.0318, "step": 7390 }, { "epoch": 1.73, "learning_rate": 7.964037387304526e-06, "loss": 0.0038, "step": 7391 }, { "epoch": 1.73, "learning_rate": 7.961556175057005e-06, "loss": 0.0022, "step": 7392 }, { "epoch": 1.74, "learning_rate": 7.959075093738332e-06, "loss": 0.0739, "step": 7393 }, { "epoch": 1.74, "learning_rate": 7.956594143507872e-06, "loss": 0.0189, "step": 7394 }, { "epoch": 1.74, "learning_rate": 7.954113324524971e-06, "loss": 0.035, "step": 7395 }, { "epoch": 1.74, "learning_rate": 7.951632636948974e-06, "loss": 0.0449, "step": 7396 }, { "epoch": 1.74, "learning_rate": 7.949152080939213e-06, "loss": 0.0022, "step": 7397 }, { "epoch": 1.74, "learning_rate": 7.946671656655013e-06, "loss": 0.0188, "step": 7398 }, { "epoch": 1.74, "learning_rate": 7.944191364255702e-06, "loss": 0.0103, "step": 7399 }, { "epoch": 1.74, "learning_rate": 7.941711203900572e-06, "loss": 0.0073, "step": 7400 }, { "epoch": 1.74, "learning_rate": 7.939231175748938e-06, "loss": 0.0474, "step": 7401 }, { "epoch": 1.74, "learning_rate": 7.936751279960079e-06, "loss": 0.0594, "step": 7402 }, { "epoch": 1.74, "learning_rate": 7.93427151669329e-06, "loss": 0.0481, "step": 7403 }, { "epoch": 1.74, "learning_rate": 7.931791886107835e-06, "loss": 0.0241, "step": 7404 }, { "epoch": 1.74, "learning_rate": 7.92931238836299e-06, "loss": 0.0266, "step": 7405 }, { "epoch": 1.74, "learning_rate": 7.926833023618003e-06, "loss": 0.0407, "step": 7406 }, { "epoch": 1.74, "learning_rate": 7.92435379203213e-06, "loss": 0.0182, "step": 7407 }, { "epoch": 1.74, "learning_rate": 7.921874693764609e-06, "loss": 0.008, "step": 7408 }, { "epoch": 1.74, "learning_rate": 7.919395728974673e-06, "loss": 0.0188, "step": 7409 }, { "epoch": 1.74, "learning_rate": 7.916916897821547e-06, "loss": 0.0167, "step": 7410 }, { "epoch": 1.74, "learning_rate": 7.91443820046444e-06, "loss": 0.0611, "step": 7411 }, { "epoch": 1.74, "learning_rate": 7.911959637062566e-06, "loss": 0.0087, "step": 7412 }, { "epoch": 1.74, "learning_rate": 7.909481207775114e-06, "loss": 0.0262, "step": 7413 }, { "epoch": 1.74, "learning_rate": 7.907002912761284e-06, "loss": 0.0512, "step": 7414 }, { "epoch": 1.74, "learning_rate": 7.904524752180246e-06, "loss": 0.0155, "step": 7415 }, { "epoch": 1.74, "learning_rate": 7.902046726191172e-06, "loss": 0.0272, "step": 7416 }, { "epoch": 1.74, "learning_rate": 7.899568834953238e-06, "loss": 0.0218, "step": 7417 }, { "epoch": 1.74, "learning_rate": 7.89709107862558e-06, "loss": 0.0753, "step": 7418 }, { "epoch": 1.74, "learning_rate": 7.894613457367362e-06, "loss": 0.0774, "step": 7419 }, { "epoch": 1.74, "learning_rate": 7.892135971337706e-06, "loss": 0.0235, "step": 7420 }, { "epoch": 1.74, "learning_rate": 7.889658620695754e-06, "loss": 0.0032, "step": 7421 }, { "epoch": 1.74, "learning_rate": 7.887181405600611e-06, "loss": 0.0392, "step": 7422 }, { "epoch": 1.74, "learning_rate": 7.884704326211402e-06, "loss": 0.0388, "step": 7423 }, { "epoch": 1.74, "learning_rate": 7.882227382687218e-06, "loss": 0.0189, "step": 7424 }, { "epoch": 1.74, "learning_rate": 7.879750575187159e-06, "loss": 0.0264, "step": 7425 }, { "epoch": 1.74, "learning_rate": 7.877273903870311e-06, "loss": 0.0247, "step": 7426 }, { "epoch": 1.74, "learning_rate": 7.874797368895745e-06, "loss": 0.0311, "step": 7427 }, { "epoch": 1.74, "learning_rate": 7.872320970422533e-06, "loss": 0.0186, "step": 7428 }, { "epoch": 1.74, "learning_rate": 7.86984470860973e-06, "loss": 0.0275, "step": 7429 }, { "epoch": 1.74, "learning_rate": 7.867368583616388e-06, "loss": 0.0117, "step": 7430 }, { "epoch": 1.74, "learning_rate": 7.864892595601547e-06, "loss": 0.048, "step": 7431 }, { "epoch": 1.74, "learning_rate": 7.862416744724237e-06, "loss": 0.0678, "step": 7432 }, { "epoch": 1.74, "learning_rate": 7.85994103114349e-06, "loss": 0.0169, "step": 7433 }, { "epoch": 1.74, "learning_rate": 7.857465455018308e-06, "loss": 0.018, "step": 7434 }, { "epoch": 1.74, "learning_rate": 7.854990016507711e-06, "loss": 0.0084, "step": 7435 }, { "epoch": 1.75, "learning_rate": 7.852514715770682e-06, "loss": 0.0265, "step": 7436 }, { "epoch": 1.75, "learning_rate": 7.850039552966221e-06, "loss": 0.0121, "step": 7437 }, { "epoch": 1.75, "learning_rate": 7.847564528253296e-06, "loss": 0.0238, "step": 7438 }, { "epoch": 1.75, "learning_rate": 7.845089641790888e-06, "loss": 0.0049, "step": 7439 }, { "epoch": 1.75, "learning_rate": 7.842614893737948e-06, "loss": 0.0583, "step": 7440 }, { "epoch": 1.75, "learning_rate": 7.840140284253436e-06, "loss": 0.0845, "step": 7441 }, { "epoch": 1.75, "learning_rate": 7.837665813496298e-06, "loss": 0.065, "step": 7442 }, { "epoch": 1.75, "learning_rate": 7.83519148162546e-06, "loss": 0.0045, "step": 7443 }, { "epoch": 1.75, "learning_rate": 7.832717288799855e-06, "loss": 0.0112, "step": 7444 }, { "epoch": 1.75, "learning_rate": 7.830243235178394e-06, "loss": 0.0083, "step": 7445 }, { "epoch": 1.75, "learning_rate": 7.827769320919992e-06, "loss": 0.003, "step": 7446 }, { "epoch": 1.75, "learning_rate": 7.825295546183542e-06, "loss": 0.017, "step": 7447 }, { "epoch": 1.75, "learning_rate": 7.822821911127939e-06, "loss": 0.0012, "step": 7448 }, { "epoch": 1.75, "learning_rate": 7.820348415912059e-06, "loss": 0.0075, "step": 7449 }, { "epoch": 1.75, "learning_rate": 7.817875060694774e-06, "loss": 0.0042, "step": 7450 }, { "epoch": 1.75, "learning_rate": 7.815401845634957e-06, "loss": 0.0585, "step": 7451 }, { "epoch": 1.75, "learning_rate": 7.812928770891447e-06, "loss": 0.0216, "step": 7452 }, { "epoch": 1.75, "learning_rate": 7.810455836623105e-06, "loss": 0.0034, "step": 7453 }, { "epoch": 1.75, "learning_rate": 7.807983042988752e-06, "loss": 0.0469, "step": 7454 }, { "epoch": 1.75, "learning_rate": 7.80551039014723e-06, "loss": 0.0034, "step": 7455 }, { "epoch": 1.75, "learning_rate": 7.80303787825734e-06, "loss": 0.0008, "step": 7456 }, { "epoch": 1.75, "learning_rate": 7.800565507477909e-06, "loss": 0.0338, "step": 7457 }, { "epoch": 1.75, "learning_rate": 7.79809327796772e-06, "loss": 0.0527, "step": 7458 }, { "epoch": 1.75, "learning_rate": 7.795621189885576e-06, "loss": 0.0416, "step": 7459 }, { "epoch": 1.75, "learning_rate": 7.793149243390254e-06, "loss": 0.0516, "step": 7460 }, { "epoch": 1.75, "learning_rate": 7.790677438640527e-06, "loss": 0.0042, "step": 7461 }, { "epoch": 1.75, "learning_rate": 7.78820577579516e-06, "loss": 0.0583, "step": 7462 }, { "epoch": 1.75, "learning_rate": 7.785734255012905e-06, "loss": 0.0025, "step": 7463 }, { "epoch": 1.75, "learning_rate": 7.783262876452509e-06, "loss": 0.0046, "step": 7464 }, { "epoch": 1.75, "learning_rate": 7.780791640272707e-06, "loss": 0.0004, "step": 7465 }, { "epoch": 1.75, "learning_rate": 7.778320546632227e-06, "loss": 0.0019, "step": 7466 }, { "epoch": 1.75, "learning_rate": 7.775849595689786e-06, "loss": 0.0183, "step": 7467 }, { "epoch": 1.75, "learning_rate": 7.77337878760409e-06, "loss": 0.004, "step": 7468 }, { "epoch": 1.75, "learning_rate": 7.770908122533848e-06, "loss": 0.0058, "step": 7469 }, { "epoch": 1.75, "learning_rate": 7.768437600637739e-06, "loss": 0.0654, "step": 7470 }, { "epoch": 1.75, "learning_rate": 7.765967222074456e-06, "loss": 0.0069, "step": 7471 }, { "epoch": 1.75, "learning_rate": 7.763496987002657e-06, "loss": 0.0702, "step": 7472 }, { "epoch": 1.75, "learning_rate": 7.761026895581017e-06, "loss": 0.0052, "step": 7473 }, { "epoch": 1.75, "learning_rate": 7.758556947968178e-06, "loss": 0.0625, "step": 7474 }, { "epoch": 1.75, "learning_rate": 7.756087144322794e-06, "loss": 0.0097, "step": 7475 }, { "epoch": 1.75, "learning_rate": 7.7536174848035e-06, "loss": 0.0003, "step": 7476 }, { "epoch": 1.75, "learning_rate": 7.751147969568914e-06, "loss": 0.0025, "step": 7477 }, { "epoch": 1.76, "learning_rate": 7.74867859877766e-06, "loss": 0.0225, "step": 7478 }, { "epoch": 1.76, "learning_rate": 7.74620937258834e-06, "loss": 0.0035, "step": 7479 }, { "epoch": 1.76, "learning_rate": 7.743740291159555e-06, "loss": 0.0259, "step": 7480 }, { "epoch": 1.76, "learning_rate": 7.741271354649892e-06, "loss": 0.0031, "step": 7481 }, { "epoch": 1.76, "learning_rate": 7.738802563217933e-06, "loss": 0.1047, "step": 7482 }, { "epoch": 1.76, "learning_rate": 7.736333917022242e-06, "loss": 0.0511, "step": 7483 }, { "epoch": 1.76, "learning_rate": 7.733865416221384e-06, "loss": 0.001, "step": 7484 }, { "epoch": 1.76, "learning_rate": 7.731397060973913e-06, "loss": 0.0008, "step": 7485 }, { "epoch": 1.76, "learning_rate": 7.728928851438364e-06, "loss": 0.0072, "step": 7486 }, { "epoch": 1.76, "learning_rate": 7.726460787773279e-06, "loss": 0.0002, "step": 7487 }, { "epoch": 1.76, "learning_rate": 7.72399287013717e-06, "loss": 0.0051, "step": 7488 }, { "epoch": 1.76, "learning_rate": 7.721525098688561e-06, "loss": 0.084, "step": 7489 }, { "epoch": 1.76, "learning_rate": 7.719057473585947e-06, "loss": 0.0476, "step": 7490 }, { "epoch": 1.76, "learning_rate": 7.716589994987836e-06, "loss": 0.0086, "step": 7491 }, { "epoch": 1.76, "learning_rate": 7.714122663052698e-06, "loss": 0.0217, "step": 7492 }, { "epoch": 1.76, "learning_rate": 7.71165547793902e-06, "loss": 0.0151, "step": 7493 }, { "epoch": 1.76, "learning_rate": 7.709188439805266e-06, "loss": 0.0208, "step": 7494 }, { "epoch": 1.76, "learning_rate": 7.706721548809893e-06, "loss": 0.0069, "step": 7495 }, { "epoch": 1.76, "learning_rate": 7.704254805111351e-06, "loss": 0.0859, "step": 7496 }, { "epoch": 1.76, "learning_rate": 7.701788208868076e-06, "loss": 0.0018, "step": 7497 }, { "epoch": 1.76, "learning_rate": 7.699321760238498e-06, "loss": 0.0474, "step": 7498 }, { "epoch": 1.76, "learning_rate": 7.696855459381034e-06, "loss": 0.0372, "step": 7499 }, { "epoch": 1.76, "learning_rate": 7.6943893064541e-06, "loss": 0.0218, "step": 7500 }, { "epoch": 1.76, "learning_rate": 7.691923301616089e-06, "loss": 0.025, "step": 7501 }, { "epoch": 1.76, "learning_rate": 7.689457445025397e-06, "loss": 0.0037, "step": 7502 }, { "epoch": 1.76, "learning_rate": 7.686991736840405e-06, "loss": 0.0089, "step": 7503 }, { "epoch": 1.76, "learning_rate": 7.684526177219481e-06, "loss": 0.1221, "step": 7504 }, { "epoch": 1.76, "learning_rate": 7.682060766320995e-06, "loss": 0.0445, "step": 7505 }, { "epoch": 1.76, "learning_rate": 7.679595504303293e-06, "loss": 0.0363, "step": 7506 }, { "epoch": 1.76, "learning_rate": 7.677130391324724e-06, "loss": 0.0032, "step": 7507 }, { "epoch": 1.76, "learning_rate": 7.674665427543613e-06, "loss": 0.0188, "step": 7508 }, { "epoch": 1.76, "learning_rate": 7.672200613118296e-06, "loss": 0.0004, "step": 7509 }, { "epoch": 1.76, "learning_rate": 7.669735948207075e-06, "loss": 0.0078, "step": 7510 }, { "epoch": 1.76, "learning_rate": 7.667271432968265e-06, "loss": 0.0266, "step": 7511 }, { "epoch": 1.76, "learning_rate": 7.664807067560157e-06, "loss": 0.0271, "step": 7512 }, { "epoch": 1.76, "learning_rate": 7.662342852141036e-06, "loss": 0.0485, "step": 7513 }, { "epoch": 1.76, "learning_rate": 7.659878786869182e-06, "loss": 0.0242, "step": 7514 }, { "epoch": 1.76, "learning_rate": 7.657414871902855e-06, "loss": 0.0307, "step": 7515 }, { "epoch": 1.76, "learning_rate": 7.65495110740032e-06, "loss": 0.0024, "step": 7516 }, { "epoch": 1.76, "learning_rate": 7.652487493519814e-06, "loss": 0.0226, "step": 7517 }, { "epoch": 1.76, "learning_rate": 7.650024030419583e-06, "loss": 0.0006, "step": 7518 }, { "epoch": 1.76, "learning_rate": 7.647560718257854e-06, "loss": 0.03, "step": 7519 }, { "epoch": 1.76, "learning_rate": 7.645097557192839e-06, "loss": 0.0009, "step": 7520 }, { "epoch": 1.77, "learning_rate": 7.642634547382753e-06, "loss": 0.0037, "step": 7521 }, { "epoch": 1.77, "learning_rate": 7.640171688985789e-06, "loss": 0.0114, "step": 7522 }, { "epoch": 1.77, "learning_rate": 7.637708982160144e-06, "loss": 0.0466, "step": 7523 }, { "epoch": 1.77, "learning_rate": 7.635246427063985e-06, "loss": 0.0136, "step": 7524 }, { "epoch": 1.77, "learning_rate": 7.632784023855496e-06, "loss": 0.0043, "step": 7525 }, { "epoch": 1.77, "learning_rate": 7.63032177269282e-06, "loss": 0.0084, "step": 7526 }, { "epoch": 1.77, "learning_rate": 7.627859673734122e-06, "loss": 0.0433, "step": 7527 }, { "epoch": 1.77, "learning_rate": 7.625397727137537e-06, "loss": 0.0063, "step": 7528 }, { "epoch": 1.77, "learning_rate": 7.6229359330611915e-06, "loss": 0.0277, "step": 7529 }, { "epoch": 1.77, "learning_rate": 7.620474291663212e-06, "loss": 0.0026, "step": 7530 }, { "epoch": 1.77, "learning_rate": 7.618012803101703e-06, "loss": 0.0269, "step": 7531 }, { "epoch": 1.77, "learning_rate": 7.6155514675347715e-06, "loss": 0.0587, "step": 7532 }, { "epoch": 1.77, "learning_rate": 7.613090285120504e-06, "loss": 0.0579, "step": 7533 }, { "epoch": 1.77, "learning_rate": 7.610629256016986e-06, "loss": 0.0421, "step": 7534 }, { "epoch": 1.77, "learning_rate": 7.608168380382283e-06, "loss": 0.0025, "step": 7535 }, { "epoch": 1.77, "learning_rate": 7.605707658374462e-06, "loss": 0.0128, "step": 7536 }, { "epoch": 1.77, "learning_rate": 7.603247090151574e-06, "loss": 0.046, "step": 7537 }, { "epoch": 1.77, "learning_rate": 7.600786675871657e-06, "loss": 0.0071, "step": 7538 }, { "epoch": 1.77, "learning_rate": 7.598326415692748e-06, "loss": 0.0295, "step": 7539 }, { "epoch": 1.77, "learning_rate": 7.5958663097728626e-06, "loss": 0.0161, "step": 7540 }, { "epoch": 1.77, "learning_rate": 7.5934063582700235e-06, "loss": 0.0234, "step": 7541 }, { "epoch": 1.77, "learning_rate": 7.590946561342219e-06, "loss": 0.0486, "step": 7542 }, { "epoch": 1.77, "learning_rate": 7.588486919147456e-06, "loss": 0.086, "step": 7543 }, { "epoch": 1.77, "learning_rate": 7.586027431843702e-06, "loss": 0.0282, "step": 7544 }, { "epoch": 1.77, "learning_rate": 7.5835680995889404e-06, "loss": 0.02, "step": 7545 }, { "epoch": 1.77, "learning_rate": 7.581108922541131e-06, "loss": 0.0154, "step": 7546 }, { "epoch": 1.77, "learning_rate": 7.578649900858224e-06, "loss": 0.0315, "step": 7547 }, { "epoch": 1.77, "learning_rate": 7.576191034698165e-06, "loss": 0.0247, "step": 7548 }, { "epoch": 1.77, "learning_rate": 7.5737323242188834e-06, "loss": 0.0063, "step": 7549 }, { "epoch": 1.77, "learning_rate": 7.571273769578305e-06, "loss": 0.0147, "step": 7550 }, { "epoch": 1.77, "learning_rate": 7.568815370934337e-06, "loss": 0.0305, "step": 7551 }, { "epoch": 1.77, "learning_rate": 7.566357128444889e-06, "loss": 0.0035, "step": 7552 }, { "epoch": 1.77, "learning_rate": 7.563899042267846e-06, "loss": 0.0026, "step": 7553 }, { "epoch": 1.77, "learning_rate": 7.561441112561095e-06, "loss": 0.0816, "step": 7554 }, { "epoch": 1.77, "learning_rate": 7.5589833394825095e-06, "loss": 0.0403, "step": 7555 }, { "epoch": 1.77, "learning_rate": 7.556525723189948e-06, "loss": 0.0236, "step": 7556 }, { "epoch": 1.77, "learning_rate": 7.5540682638412655e-06, "loss": 0.003, "step": 7557 }, { "epoch": 1.77, "learning_rate": 7.5516109615943004e-06, "loss": 0.001, "step": 7558 }, { "epoch": 1.77, "learning_rate": 7.549153816606892e-06, "loss": 0.0174, "step": 7559 }, { "epoch": 1.77, "learning_rate": 7.546696829036852e-06, "loss": 0.0006, "step": 7560 }, { "epoch": 1.77, "learning_rate": 7.544239999042e-06, "loss": 0.0137, "step": 7561 }, { "epoch": 1.77, "learning_rate": 7.541783326780138e-06, "loss": 0.0244, "step": 7562 }, { "epoch": 1.77, "learning_rate": 7.539326812409053e-06, "loss": 0.0517, "step": 7563 }, { "epoch": 1.78, "learning_rate": 7.5368704560865305e-06, "loss": 0.1013, "step": 7564 }, { "epoch": 1.78, "learning_rate": 7.534414257970337e-06, "loss": 0.0538, "step": 7565 }, { "epoch": 1.78, "learning_rate": 7.5319582182182405e-06, "loss": 0.033, "step": 7566 }, { "epoch": 1.78, "learning_rate": 7.529502336987984e-06, "loss": 0.0798, "step": 7567 }, { "epoch": 1.78, "learning_rate": 7.5270466144373155e-06, "loss": 0.0042, "step": 7568 }, { "epoch": 1.78, "learning_rate": 7.524591050723959e-06, "loss": 0.0216, "step": 7569 }, { "epoch": 1.78, "learning_rate": 7.522135646005641e-06, "loss": 0.026, "step": 7570 }, { "epoch": 1.78, "learning_rate": 7.519680400440068e-06, "loss": 0.022, "step": 7571 }, { "epoch": 1.78, "learning_rate": 7.51722531418494e-06, "loss": 0.0352, "step": 7572 }, { "epoch": 1.78, "learning_rate": 7.5147703873979495e-06, "loss": 0.0206, "step": 7573 }, { "epoch": 1.78, "learning_rate": 7.512315620236772e-06, "loss": 0.0014, "step": 7574 }, { "epoch": 1.78, "learning_rate": 7.509861012859081e-06, "loss": 0.006, "step": 7575 }, { "epoch": 1.78, "learning_rate": 7.507406565422529e-06, "loss": 0.0372, "step": 7576 }, { "epoch": 1.78, "learning_rate": 7.504952278084775e-06, "loss": 0.0277, "step": 7577 }, { "epoch": 1.78, "learning_rate": 7.502498151003445e-06, "loss": 0.0635, "step": 7578 }, { "epoch": 1.78, "learning_rate": 7.500044184336175e-06, "loss": 0.0208, "step": 7579 }, { "epoch": 1.78, "learning_rate": 7.497590378240585e-06, "loss": 0.0365, "step": 7580 }, { "epoch": 1.78, "learning_rate": 7.4951367328742755e-06, "loss": 0.055, "step": 7581 }, { "epoch": 1.78, "learning_rate": 7.4926832483948474e-06, "loss": 0.002, "step": 7582 }, { "epoch": 1.78, "learning_rate": 7.490229924959886e-06, "loss": 0.0023, "step": 7583 }, { "epoch": 1.78, "learning_rate": 7.487776762726972e-06, "loss": 0.0281, "step": 7584 }, { "epoch": 1.78, "learning_rate": 7.485323761853665e-06, "loss": 0.0322, "step": 7585 }, { "epoch": 1.78, "learning_rate": 7.482870922497527e-06, "loss": 0.0227, "step": 7586 }, { "epoch": 1.78, "learning_rate": 7.480418244816097e-06, "loss": 0.0235, "step": 7587 }, { "epoch": 1.78, "learning_rate": 7.477965728966914e-06, "loss": 0.0058, "step": 7588 }, { "epoch": 1.78, "learning_rate": 7.475513375107505e-06, "loss": 0.0059, "step": 7589 }, { "epoch": 1.78, "learning_rate": 7.473061183395378e-06, "loss": 0.0085, "step": 7590 }, { "epoch": 1.78, "learning_rate": 7.470609153988043e-06, "loss": 0.0098, "step": 7591 }, { "epoch": 1.78, "learning_rate": 7.468157287042988e-06, "loss": 0.0256, "step": 7592 }, { "epoch": 1.78, "learning_rate": 7.465705582717702e-06, "loss": 0.0592, "step": 7593 }, { "epoch": 1.78, "learning_rate": 7.463254041169648e-06, "loss": 0.0106, "step": 7594 }, { "epoch": 1.78, "learning_rate": 7.460802662556296e-06, "loss": 0.076, "step": 7595 }, { "epoch": 1.78, "learning_rate": 7.458351447035097e-06, "loss": 0.0274, "step": 7596 }, { "epoch": 1.78, "learning_rate": 7.45590039476349e-06, "loss": 0.0283, "step": 7597 }, { "epoch": 1.78, "learning_rate": 7.453449505898909e-06, "loss": 0.0165, "step": 7598 }, { "epoch": 1.78, "learning_rate": 7.450998780598767e-06, "loss": 0.0049, "step": 7599 }, { "epoch": 1.78, "learning_rate": 7.448548219020482e-06, "loss": 0.04, "step": 7600 }, { "epoch": 1.78, "learning_rate": 7.446097821321445e-06, "loss": 0.0075, "step": 7601 }, { "epoch": 1.78, "learning_rate": 7.443647587659052e-06, "loss": 0.0039, "step": 7602 }, { "epoch": 1.78, "learning_rate": 7.4411975181906744e-06, "loss": 0.0006, "step": 7603 }, { "epoch": 1.78, "learning_rate": 7.4387476130736845e-06, "loss": 0.0079, "step": 7604 }, { "epoch": 1.78, "learning_rate": 7.436297872465439e-06, "loss": 0.0077, "step": 7605 }, { "epoch": 1.79, "learning_rate": 7.433848296523281e-06, "loss": 0.0152, "step": 7606 }, { "epoch": 1.79, "learning_rate": 7.4313988854045495e-06, "loss": 0.0355, "step": 7607 }, { "epoch": 1.79, "learning_rate": 7.4289496392665675e-06, "loss": 0.0229, "step": 7608 }, { "epoch": 1.79, "learning_rate": 7.4265005582666516e-06, "loss": 0.0293, "step": 7609 }, { "epoch": 1.79, "learning_rate": 7.424051642562102e-06, "loss": 0.0007, "step": 7610 }, { "epoch": 1.79, "learning_rate": 7.421602892310218e-06, "loss": 0.014, "step": 7611 }, { "epoch": 1.79, "learning_rate": 7.419154307668275e-06, "loss": 0.0442, "step": 7612 }, { "epoch": 1.79, "learning_rate": 7.416705888793552e-06, "loss": 0.0238, "step": 7613 }, { "epoch": 1.79, "learning_rate": 7.414257635843307e-06, "loss": 0.022, "step": 7614 }, { "epoch": 1.79, "learning_rate": 7.411809548974792e-06, "loss": 0.0378, "step": 7615 }, { "epoch": 1.79, "learning_rate": 7.409361628345249e-06, "loss": 0.1061, "step": 7616 }, { "epoch": 1.79, "learning_rate": 7.406913874111901e-06, "loss": 0.0073, "step": 7617 }, { "epoch": 1.79, "learning_rate": 7.404466286431975e-06, "loss": 0.0646, "step": 7618 }, { "epoch": 1.79, "learning_rate": 7.402018865462671e-06, "loss": 0.1104, "step": 7619 }, { "epoch": 1.79, "learning_rate": 7.399571611361193e-06, "loss": 0.0008, "step": 7620 }, { "epoch": 1.79, "learning_rate": 7.3971245242847245e-06, "loss": 0.0564, "step": 7621 }, { "epoch": 1.79, "learning_rate": 7.394677604390441e-06, "loss": 0.021, "step": 7622 }, { "epoch": 1.79, "learning_rate": 7.39223085183551e-06, "loss": 0.0356, "step": 7623 }, { "epoch": 1.79, "learning_rate": 7.389784266777083e-06, "loss": 0.001, "step": 7624 }, { "epoch": 1.79, "learning_rate": 7.3873378493723065e-06, "loss": 0.0612, "step": 7625 }, { "epoch": 1.79, "learning_rate": 7.384891599778312e-06, "loss": 0.0089, "step": 7626 }, { "epoch": 1.79, "learning_rate": 7.382445518152224e-06, "loss": 0.0235, "step": 7627 }, { "epoch": 1.79, "learning_rate": 7.379999604651147e-06, "loss": 0.0227, "step": 7628 }, { "epoch": 1.79, "learning_rate": 7.377553859432189e-06, "loss": 0.0079, "step": 7629 }, { "epoch": 1.79, "learning_rate": 7.3751082826524365e-06, "loss": 0.0207, "step": 7630 }, { "epoch": 1.79, "learning_rate": 7.372662874468965e-06, "loss": 0.0222, "step": 7631 }, { "epoch": 1.79, "learning_rate": 7.3702176350388524e-06, "loss": 0.0068, "step": 7632 }, { "epoch": 1.79, "learning_rate": 7.3677725645191465e-06, "loss": 0.016, "step": 7633 }, { "epoch": 1.79, "learning_rate": 7.3653276630669e-06, "loss": 0.0034, "step": 7634 }, { "epoch": 1.79, "learning_rate": 7.362882930839144e-06, "loss": 0.0007, "step": 7635 }, { "epoch": 1.79, "learning_rate": 7.360438367992906e-06, "loss": 0.0189, "step": 7636 }, { "epoch": 1.79, "learning_rate": 7.357993974685197e-06, "loss": 0.0017, "step": 7637 }, { "epoch": 1.79, "learning_rate": 7.355549751073021e-06, "loss": 0.0109, "step": 7638 }, { "epoch": 1.79, "learning_rate": 7.353105697313374e-06, "loss": 0.003, "step": 7639 }, { "epoch": 1.79, "learning_rate": 7.35066181356323e-06, "loss": 0.0154, "step": 7640 }, { "epoch": 1.79, "learning_rate": 7.348218099979565e-06, "loss": 0.0206, "step": 7641 }, { "epoch": 1.79, "learning_rate": 7.345774556719335e-06, "loss": 0.0161, "step": 7642 }, { "epoch": 1.79, "learning_rate": 7.343331183939491e-06, "loss": 0.0006, "step": 7643 }, { "epoch": 1.79, "learning_rate": 7.340887981796967e-06, "loss": 0.0382, "step": 7644 }, { "epoch": 1.79, "learning_rate": 7.338444950448693e-06, "loss": 0.0071, "step": 7645 }, { "epoch": 1.79, "learning_rate": 7.3360020900515795e-06, "loss": 0.055, "step": 7646 }, { "epoch": 1.79, "learning_rate": 7.3335594007625334e-06, "loss": 0.0029, "step": 7647 }, { "epoch": 1.79, "learning_rate": 7.3311168827384535e-06, "loss": 0.0166, "step": 7648 }, { "epoch": 1.8, "learning_rate": 7.328674536136214e-06, "loss": 0.0244, "step": 7649 }, { "epoch": 1.8, "learning_rate": 7.32623236111269e-06, "loss": 0.0011, "step": 7650 }, { "epoch": 1.8, "learning_rate": 7.323790357824743e-06, "loss": 0.0196, "step": 7651 }, { "epoch": 1.8, "learning_rate": 7.321348526429222e-06, "loss": 0.0043, "step": 7652 }, { "epoch": 1.8, "learning_rate": 7.318906867082962e-06, "loss": 0.0807, "step": 7653 }, { "epoch": 1.8, "learning_rate": 7.316465379942797e-06, "loss": 0.0086, "step": 7654 }, { "epoch": 1.8, "learning_rate": 7.314024065165535e-06, "loss": 0.0242, "step": 7655 }, { "epoch": 1.8, "learning_rate": 7.311582922907987e-06, "loss": 0.0408, "step": 7656 }, { "epoch": 1.8, "learning_rate": 7.3091419533269455e-06, "loss": 0.0386, "step": 7657 }, { "epoch": 1.8, "learning_rate": 7.306701156579192e-06, "loss": 0.0567, "step": 7658 }, { "epoch": 1.8, "learning_rate": 7.3042605328215035e-06, "loss": 0.0011, "step": 7659 }, { "epoch": 1.8, "learning_rate": 7.301820082210633e-06, "loss": 0.0024, "step": 7660 }, { "epoch": 1.8, "learning_rate": 7.299379804903337e-06, "loss": 0.0321, "step": 7661 }, { "epoch": 1.8, "learning_rate": 7.296939701056349e-06, "loss": 0.0318, "step": 7662 }, { "epoch": 1.8, "learning_rate": 7.294499770826401e-06, "loss": 0.0056, "step": 7663 }, { "epoch": 1.8, "learning_rate": 7.292060014370203e-06, "loss": 0.1091, "step": 7664 }, { "epoch": 1.8, "learning_rate": 7.2896204318444645e-06, "loss": 0.0754, "step": 7665 }, { "epoch": 1.8, "learning_rate": 7.287181023405885e-06, "loss": 0.042, "step": 7666 }, { "epoch": 1.8, "learning_rate": 7.284741789211134e-06, "loss": 0.0051, "step": 7667 }, { "epoch": 1.8, "learning_rate": 7.282302729416895e-06, "loss": 0.0054, "step": 7668 }, { "epoch": 1.8, "learning_rate": 7.27986384417982e-06, "loss": 0.0408, "step": 7669 }, { "epoch": 1.8, "learning_rate": 7.277425133656565e-06, "loss": 0.0496, "step": 7670 }, { "epoch": 1.8, "learning_rate": 7.274986598003761e-06, "loss": 0.0025, "step": 7671 }, { "epoch": 1.8, "learning_rate": 7.272548237378043e-06, "loss": 0.0869, "step": 7672 }, { "epoch": 1.8, "learning_rate": 7.2701100519360165e-06, "loss": 0.0016, "step": 7673 }, { "epoch": 1.8, "learning_rate": 7.267672041834293e-06, "loss": 0.0218, "step": 7674 }, { "epoch": 1.8, "learning_rate": 7.265234207229466e-06, "loss": 0.0293, "step": 7675 }, { "epoch": 1.8, "learning_rate": 7.2627965482781105e-06, "loss": 0.0067, "step": 7676 }, { "epoch": 1.8, "learning_rate": 7.260359065136803e-06, "loss": 0.0068, "step": 7677 }, { "epoch": 1.8, "learning_rate": 7.257921757962099e-06, "loss": 0.0079, "step": 7678 }, { "epoch": 1.8, "learning_rate": 7.255484626910549e-06, "loss": 0.0107, "step": 7679 }, { "epoch": 1.8, "learning_rate": 7.253047672138685e-06, "loss": 0.0018, "step": 7680 }, { "epoch": 1.8, "learning_rate": 7.250610893803034e-06, "loss": 0.0059, "step": 7681 }, { "epoch": 1.8, "learning_rate": 7.248174292060117e-06, "loss": 0.078, "step": 7682 }, { "epoch": 1.8, "learning_rate": 7.2457378670664245e-06, "loss": 0.0017, "step": 7683 }, { "epoch": 1.8, "learning_rate": 7.24330161897846e-06, "loss": 0.0929, "step": 7684 }, { "epoch": 1.8, "learning_rate": 7.240865547952691e-06, "loss": 0.0293, "step": 7685 }, { "epoch": 1.8, "learning_rate": 7.238429654145594e-06, "loss": 0.006, "step": 7686 }, { "epoch": 1.8, "learning_rate": 7.235993937713624e-06, "loss": 0.0253, "step": 7687 }, { "epoch": 1.8, "learning_rate": 7.233558398813226e-06, "loss": 0.003, "step": 7688 }, { "epoch": 1.8, "learning_rate": 7.231123037600833e-06, "loss": 0.0327, "step": 7689 }, { "epoch": 1.8, "learning_rate": 7.22868785423287e-06, "loss": 0.0075, "step": 7690 }, { "epoch": 1.81, "learning_rate": 7.226252848865749e-06, "loss": 0.0068, "step": 7691 }, { "epoch": 1.81, "learning_rate": 7.223818021655866e-06, "loss": 0.0279, "step": 7692 }, { "epoch": 1.81, "learning_rate": 7.2213833727596135e-06, "loss": 0.0148, "step": 7693 }, { "epoch": 1.81, "learning_rate": 7.218948902333367e-06, "loss": 0.0022, "step": 7694 }, { "epoch": 1.81, "learning_rate": 7.216514610533492e-06, "loss": 0.0404, "step": 7695 }, { "epoch": 1.81, "learning_rate": 7.214080497516343e-06, "loss": 0.0029, "step": 7696 }, { "epoch": 1.81, "learning_rate": 7.211646563438262e-06, "loss": 0.0603, "step": 7697 }, { "epoch": 1.81, "learning_rate": 7.209212808455579e-06, "loss": 0.0005, "step": 7698 }, { "epoch": 1.81, "learning_rate": 7.206779232724612e-06, "loss": 0.0071, "step": 7699 }, { "epoch": 1.81, "learning_rate": 7.20434583640168e-06, "loss": 0.097, "step": 7700 }, { "epoch": 1.81, "learning_rate": 7.201912619643063e-06, "loss": 0.0267, "step": 7701 }, { "epoch": 1.81, "learning_rate": 7.199479582605062e-06, "loss": 0.0626, "step": 7702 }, { "epoch": 1.81, "learning_rate": 7.197046725443938e-06, "loss": 0.0166, "step": 7703 }, { "epoch": 1.81, "learning_rate": 7.194614048315962e-06, "loss": 0.0158, "step": 7704 }, { "epoch": 1.81, "learning_rate": 7.192181551377378e-06, "loss": 0.0673, "step": 7705 }, { "epoch": 1.81, "learning_rate": 7.189749234784429e-06, "loss": 0.0682, "step": 7706 }, { "epoch": 1.81, "learning_rate": 7.1873170986933386e-06, "loss": 0.0267, "step": 7707 }, { "epoch": 1.81, "learning_rate": 7.1848851432603225e-06, "loss": 0.0038, "step": 7708 }, { "epoch": 1.81, "learning_rate": 7.182453368641591e-06, "loss": 0.0186, "step": 7709 }, { "epoch": 1.81, "learning_rate": 7.18002177499333e-06, "loss": 0.0463, "step": 7710 }, { "epoch": 1.81, "learning_rate": 7.177590362471724e-06, "loss": 0.0121, "step": 7711 }, { "epoch": 1.81, "learning_rate": 7.175159131232938e-06, "loss": 0.0307, "step": 7712 }, { "epoch": 1.81, "learning_rate": 7.172728081433133e-06, "loss": 0.0194, "step": 7713 }, { "epoch": 1.81, "learning_rate": 7.170297213228455e-06, "loss": 0.0071, "step": 7714 }, { "epoch": 1.81, "learning_rate": 7.1678665267750355e-06, "loss": 0.0605, "step": 7715 }, { "epoch": 1.81, "learning_rate": 7.165436022229e-06, "loss": 0.0004, "step": 7716 }, { "epoch": 1.81, "learning_rate": 7.163005699746452e-06, "loss": 0.0275, "step": 7717 }, { "epoch": 1.81, "learning_rate": 7.1605755594835055e-06, "loss": 0.0111, "step": 7718 }, { "epoch": 1.81, "learning_rate": 7.158145601596232e-06, "loss": 0.0113, "step": 7719 }, { "epoch": 1.81, "learning_rate": 7.155715826240721e-06, "loss": 0.0525, "step": 7720 }, { "epoch": 1.81, "learning_rate": 7.153286233573022e-06, "loss": 0.0082, "step": 7721 }, { "epoch": 1.81, "learning_rate": 7.150856823749202e-06, "loss": 0.0031, "step": 7722 }, { "epoch": 1.81, "learning_rate": 7.148427596925289e-06, "loss": 0.0156, "step": 7723 }, { "epoch": 1.81, "learning_rate": 7.1459985532573184e-06, "loss": 0.0285, "step": 7724 }, { "epoch": 1.81, "learning_rate": 7.1435696929013085e-06, "loss": 0.0044, "step": 7725 }, { "epoch": 1.81, "learning_rate": 7.14114101601326e-06, "loss": 0.05, "step": 7726 }, { "epoch": 1.81, "learning_rate": 7.138712522749171e-06, "loss": 0.0243, "step": 7727 }, { "epoch": 1.81, "learning_rate": 7.136284213265018e-06, "loss": 0.0009, "step": 7728 }, { "epoch": 1.81, "learning_rate": 7.1338560877167754e-06, "loss": 0.0185, "step": 7729 }, { "epoch": 1.81, "learning_rate": 7.131428146260398e-06, "loss": 0.0382, "step": 7730 }, { "epoch": 1.81, "learning_rate": 7.1290003890518345e-06, "loss": 0.01, "step": 7731 }, { "epoch": 1.81, "learning_rate": 7.126572816247015e-06, "loss": 0.054, "step": 7732 }, { "epoch": 1.81, "learning_rate": 7.124145428001865e-06, "loss": 0.0124, "step": 7733 }, { "epoch": 1.82, "learning_rate": 7.1217182244723005e-06, "loss": 0.0011, "step": 7734 }, { "epoch": 1.82, "learning_rate": 7.1192912058142095e-06, "loss": 0.0301, "step": 7735 }, { "epoch": 1.82, "learning_rate": 7.11686437218349e-06, "loss": 0.0418, "step": 7736 }, { "epoch": 1.82, "learning_rate": 7.114437723736005e-06, "loss": 0.0785, "step": 7737 }, { "epoch": 1.82, "learning_rate": 7.112011260627631e-06, "loss": 0.0127, "step": 7738 }, { "epoch": 1.82, "learning_rate": 7.109584983014206e-06, "loss": 0.0425, "step": 7739 }, { "epoch": 1.82, "learning_rate": 7.10715889105158e-06, "loss": 0.0284, "step": 7740 }, { "epoch": 1.82, "learning_rate": 7.104732984895574e-06, "loss": 0.0003, "step": 7741 }, { "epoch": 1.82, "learning_rate": 7.102307264702006e-06, "loss": 0.0075, "step": 7742 }, { "epoch": 1.82, "learning_rate": 7.099881730626681e-06, "loss": 0.0191, "step": 7743 }, { "epoch": 1.82, "learning_rate": 7.097456382825388e-06, "loss": 0.0695, "step": 7744 }, { "epoch": 1.82, "learning_rate": 7.0950312214539075e-06, "loss": 0.0205, "step": 7745 }, { "epoch": 1.82, "learning_rate": 7.092606246668005e-06, "loss": 0.0274, "step": 7746 }, { "epoch": 1.82, "learning_rate": 7.090181458623442e-06, "loss": 0.0027, "step": 7747 }, { "epoch": 1.82, "learning_rate": 7.087756857475956e-06, "loss": 0.0087, "step": 7748 }, { "epoch": 1.82, "learning_rate": 7.085332443381283e-06, "loss": 0.0061, "step": 7749 }, { "epoch": 1.82, "learning_rate": 7.082908216495138e-06, "loss": 0.0853, "step": 7750 }, { "epoch": 1.82, "learning_rate": 7.080484176973229e-06, "loss": 0.0276, "step": 7751 }, { "epoch": 1.82, "learning_rate": 7.078060324971261e-06, "loss": 0.0248, "step": 7752 }, { "epoch": 1.82, "learning_rate": 7.075636660644905e-06, "loss": 0.0547, "step": 7753 }, { "epoch": 1.82, "learning_rate": 7.073213184149843e-06, "loss": 0.0039, "step": 7754 }, { "epoch": 1.82, "learning_rate": 7.070789895641723e-06, "loss": 0.015, "step": 7755 }, { "epoch": 1.82, "learning_rate": 7.068366795276205e-06, "loss": 0.0156, "step": 7756 }, { "epoch": 1.82, "learning_rate": 7.065943883208912e-06, "loss": 0.0148, "step": 7757 }, { "epoch": 1.82, "learning_rate": 7.0635211595954764e-06, "loss": 0.0408, "step": 7758 }, { "epoch": 1.82, "learning_rate": 7.061098624591504e-06, "loss": 0.0104, "step": 7759 }, { "epoch": 1.82, "learning_rate": 7.058676278352598e-06, "loss": 0.042, "step": 7760 }, { "epoch": 1.82, "learning_rate": 7.056254121034344e-06, "loss": 0.0163, "step": 7761 }, { "epoch": 1.82, "learning_rate": 7.053832152792313e-06, "loss": 0.001, "step": 7762 }, { "epoch": 1.82, "learning_rate": 7.0514103737820725e-06, "loss": 0.1034, "step": 7763 }, { "epoch": 1.82, "learning_rate": 7.04898878415917e-06, "loss": 0.0116, "step": 7764 }, { "epoch": 1.82, "learning_rate": 7.046567384079144e-06, "loss": 0.003, "step": 7765 }, { "epoch": 1.82, "learning_rate": 7.04414617369752e-06, "loss": 0.0129, "step": 7766 }, { "epoch": 1.82, "learning_rate": 7.041725153169811e-06, "loss": 0.0428, "step": 7767 }, { "epoch": 1.82, "learning_rate": 7.039304322651527e-06, "loss": 0.0303, "step": 7768 }, { "epoch": 1.82, "learning_rate": 7.036883682298143e-06, "loss": 0.0407, "step": 7769 }, { "epoch": 1.82, "learning_rate": 7.0344632322651516e-06, "loss": 0.03, "step": 7770 }, { "epoch": 1.82, "learning_rate": 7.032042972708005e-06, "loss": 0.0331, "step": 7771 }, { "epoch": 1.82, "learning_rate": 7.029622903782167e-06, "loss": 0.0199, "step": 7772 }, { "epoch": 1.82, "learning_rate": 7.027203025643067e-06, "loss": 0.0414, "step": 7773 }, { "epoch": 1.82, "learning_rate": 7.024783338446144e-06, "loss": 0.0127, "step": 7774 }, { "epoch": 1.82, "learning_rate": 7.022363842346803e-06, "loss": 0.0321, "step": 7775 }, { "epoch": 1.82, "learning_rate": 7.019944537500456e-06, "loss": 0.0393, "step": 7776 }, { "epoch": 1.83, "learning_rate": 7.017525424062495e-06, "loss": 0.0017, "step": 7777 }, { "epoch": 1.83, "learning_rate": 7.015106502188294e-06, "loss": 0.0423, "step": 7778 }, { "epoch": 1.83, "learning_rate": 7.012687772033223e-06, "loss": 0.0201, "step": 7779 }, { "epoch": 1.83, "learning_rate": 7.010269233752635e-06, "loss": 0.04, "step": 7780 }, { "epoch": 1.83, "learning_rate": 7.007850887501874e-06, "loss": 0.0042, "step": 7781 }, { "epoch": 1.83, "learning_rate": 7.0054327334362685e-06, "loss": 0.0213, "step": 7782 }, { "epoch": 1.83, "learning_rate": 7.0030147717111375e-06, "loss": 0.0152, "step": 7783 }, { "epoch": 1.83, "learning_rate": 7.000597002481784e-06, "loss": 0.0038, "step": 7784 }, { "epoch": 1.83, "learning_rate": 6.9981794259035e-06, "loss": 0.077, "step": 7785 }, { "epoch": 1.83, "learning_rate": 6.995762042131573e-06, "loss": 0.0043, "step": 7786 }, { "epoch": 1.83, "learning_rate": 6.99334485132126e-06, "loss": 0.0019, "step": 7787 }, { "epoch": 1.83, "learning_rate": 6.990927853627831e-06, "loss": 0.0126, "step": 7788 }, { "epoch": 1.83, "learning_rate": 6.988511049206513e-06, "loss": 0.0122, "step": 7789 }, { "epoch": 1.83, "learning_rate": 6.986094438212551e-06, "loss": 0.0591, "step": 7790 }, { "epoch": 1.83, "learning_rate": 6.983678020801154e-06, "loss": 0.0118, "step": 7791 }, { "epoch": 1.83, "learning_rate": 6.981261797127537e-06, "loss": 0.0074, "step": 7792 }, { "epoch": 1.83, "learning_rate": 6.97884576734688e-06, "loss": 0.0405, "step": 7793 }, { "epoch": 1.83, "learning_rate": 6.976429931614374e-06, "loss": 0.0367, "step": 7794 }, { "epoch": 1.83, "learning_rate": 6.974014290085191e-06, "loss": 0.0061, "step": 7795 }, { "epoch": 1.83, "learning_rate": 6.971598842914477e-06, "loss": 0.0212, "step": 7796 }, { "epoch": 1.83, "learning_rate": 6.969183590257385e-06, "loss": 0.0162, "step": 7797 }, { "epoch": 1.83, "learning_rate": 6.9667685322690386e-06, "loss": 0.0017, "step": 7798 }, { "epoch": 1.83, "learning_rate": 6.964353669104562e-06, "loss": 0.0187, "step": 7799 }, { "epoch": 1.83, "learning_rate": 6.961939000919058e-06, "loss": 0.0367, "step": 7800 }, { "epoch": 1.83, "learning_rate": 6.959524527867618e-06, "loss": 0.0237, "step": 7801 }, { "epoch": 1.83, "learning_rate": 6.957110250105335e-06, "loss": 0.0035, "step": 7802 }, { "epoch": 1.83, "learning_rate": 6.95469616778726e-06, "loss": 0.064, "step": 7803 }, { "epoch": 1.83, "learning_rate": 6.952282281068467e-06, "loss": 0.0488, "step": 7804 }, { "epoch": 1.83, "learning_rate": 6.949868590103984e-06, "loss": 0.0167, "step": 7805 }, { "epoch": 1.83, "learning_rate": 6.947455095048855e-06, "loss": 0.0494, "step": 7806 }, { "epoch": 1.83, "learning_rate": 6.945041796058085e-06, "loss": 0.02, "step": 7807 }, { "epoch": 1.83, "learning_rate": 6.942628693286695e-06, "loss": 0.0393, "step": 7808 }, { "epoch": 1.83, "learning_rate": 6.940215786889661e-06, "loss": 0.0643, "step": 7809 }, { "epoch": 1.83, "learning_rate": 6.937803077021976e-06, "loss": 0.0251, "step": 7810 }, { "epoch": 1.83, "learning_rate": 6.935390563838606e-06, "loss": 0.0113, "step": 7811 }, { "epoch": 1.83, "learning_rate": 6.932978247494502e-06, "loss": 0.0218, "step": 7812 }, { "epoch": 1.83, "learning_rate": 6.9305661281446115e-06, "loss": 0.0083, "step": 7813 }, { "epoch": 1.83, "learning_rate": 6.92815420594386e-06, "loss": 0.006, "step": 7814 }, { "epoch": 1.83, "learning_rate": 6.925742481047169e-06, "loss": 0.0051, "step": 7815 }, { "epoch": 1.83, "learning_rate": 6.9233309536094396e-06, "loss": 0.057, "step": 7816 }, { "epoch": 1.83, "learning_rate": 6.920919623785566e-06, "loss": 0.0779, "step": 7817 }, { "epoch": 1.83, "learning_rate": 6.918508491730425e-06, "loss": 0.0917, "step": 7818 }, { "epoch": 1.84, "learning_rate": 6.916097557598882e-06, "loss": 0.0197, "step": 7819 }, { "epoch": 1.84, "learning_rate": 6.9136868215458e-06, "loss": 0.0023, "step": 7820 }, { "epoch": 1.84, "learning_rate": 6.911276283726007e-06, "loss": 0.0457, "step": 7821 }, { "epoch": 1.84, "learning_rate": 6.908865944294345e-06, "loss": 0.013, "step": 7822 }, { "epoch": 1.84, "learning_rate": 6.906455803405616e-06, "loss": 0.0158, "step": 7823 }, { "epoch": 1.84, "learning_rate": 6.904045861214637e-06, "loss": 0.0096, "step": 7824 }, { "epoch": 1.84, "learning_rate": 6.9016361178761825e-06, "loss": 0.0107, "step": 7825 }, { "epoch": 1.84, "learning_rate": 6.899226573545046e-06, "loss": 0.0358, "step": 7826 }, { "epoch": 1.84, "learning_rate": 6.896817228375977e-06, "loss": 0.0132, "step": 7827 }, { "epoch": 1.84, "learning_rate": 6.894408082523736e-06, "loss": 0.0221, "step": 7828 }, { "epoch": 1.84, "learning_rate": 6.8919991361430635e-06, "loss": 0.0364, "step": 7829 }, { "epoch": 1.84, "learning_rate": 6.8895903893886795e-06, "loss": 0.002, "step": 7830 }, { "epoch": 1.84, "learning_rate": 6.887181842415303e-06, "loss": 0.011, "step": 7831 }, { "epoch": 1.84, "learning_rate": 6.884773495377628e-06, "loss": 0.0985, "step": 7832 }, { "epoch": 1.84, "learning_rate": 6.88236534843035e-06, "loss": 0.0006, "step": 7833 }, { "epoch": 1.84, "learning_rate": 6.879957401728137e-06, "loss": 0.0032, "step": 7834 }, { "epoch": 1.84, "learning_rate": 6.877549655425656e-06, "loss": 0.0559, "step": 7835 }, { "epoch": 1.84, "learning_rate": 6.875142109677551e-06, "loss": 0.0161, "step": 7836 }, { "epoch": 1.84, "learning_rate": 6.87273476463846e-06, "loss": 0.0008, "step": 7837 }, { "epoch": 1.84, "learning_rate": 6.8703276204630135e-06, "loss": 0.0175, "step": 7838 }, { "epoch": 1.84, "learning_rate": 6.867920677305808e-06, "loss": 0.0225, "step": 7839 }, { "epoch": 1.84, "learning_rate": 6.865513935321456e-06, "loss": 0.008, "step": 7840 }, { "epoch": 1.84, "learning_rate": 6.863107394664528e-06, "loss": 0.0764, "step": 7841 }, { "epoch": 1.84, "learning_rate": 6.860701055489609e-06, "loss": 0.0053, "step": 7842 }, { "epoch": 1.84, "learning_rate": 6.858294917951244e-06, "loss": 0.006, "step": 7843 }, { "epoch": 1.84, "learning_rate": 6.85588898220399e-06, "loss": 0.0467, "step": 7844 }, { "epoch": 1.84, "learning_rate": 6.853483248402377e-06, "loss": 0.0332, "step": 7845 }, { "epoch": 1.84, "learning_rate": 6.85107771670092e-06, "loss": 0.0022, "step": 7846 }, { "epoch": 1.84, "learning_rate": 6.848672387254133e-06, "loss": 0.0097, "step": 7847 }, { "epoch": 1.84, "learning_rate": 6.846267260216503e-06, "loss": 0.0045, "step": 7848 }, { "epoch": 1.84, "learning_rate": 6.843862335742516e-06, "loss": 0.012, "step": 7849 }, { "epoch": 1.84, "learning_rate": 6.841457613986637e-06, "loss": 0.035, "step": 7850 }, { "epoch": 1.84, "learning_rate": 6.839053095103322e-06, "loss": 0.0813, "step": 7851 }, { "epoch": 1.84, "learning_rate": 6.836648779247011e-06, "loss": 0.0029, "step": 7852 }, { "epoch": 1.84, "learning_rate": 6.834244666572134e-06, "loss": 0.0464, "step": 7853 }, { "epoch": 1.84, "learning_rate": 6.8318407572331084e-06, "loss": 0.0006, "step": 7854 }, { "epoch": 1.84, "learning_rate": 6.829437051384334e-06, "loss": 0.0334, "step": 7855 }, { "epoch": 1.84, "learning_rate": 6.827033549180204e-06, "loss": 0.0203, "step": 7856 }, { "epoch": 1.84, "learning_rate": 6.824630250775087e-06, "loss": 0.0106, "step": 7857 }, { "epoch": 1.84, "learning_rate": 6.8222271563233586e-06, "loss": 0.0573, "step": 7858 }, { "epoch": 1.84, "learning_rate": 6.819824265979357e-06, "loss": 0.026, "step": 7859 }, { "epoch": 1.84, "learning_rate": 6.8174215798974295e-06, "loss": 0.0024, "step": 7860 }, { "epoch": 1.84, "learning_rate": 6.815019098231891e-06, "loss": 0.0035, "step": 7861 }, { "epoch": 1.85, "learning_rate": 6.812616821137057e-06, "loss": 0.0211, "step": 7862 }, { "epoch": 1.85, "learning_rate": 6.810214748767229e-06, "loss": 0.006, "step": 7863 }, { "epoch": 1.85, "learning_rate": 6.807812881276685e-06, "loss": 0.0223, "step": 7864 }, { "epoch": 1.85, "learning_rate": 6.805411218819701e-06, "loss": 0.0654, "step": 7865 }, { "epoch": 1.85, "learning_rate": 6.803009761550532e-06, "loss": 0.0039, "step": 7866 }, { "epoch": 1.85, "learning_rate": 6.8006085096234256e-06, "loss": 0.0036, "step": 7867 }, { "epoch": 1.85, "learning_rate": 6.798207463192612e-06, "loss": 0.0067, "step": 7868 }, { "epoch": 1.85, "learning_rate": 6.795806622412312e-06, "loss": 0.0025, "step": 7869 }, { "epoch": 1.85, "learning_rate": 6.793405987436728e-06, "loss": 0.002, "step": 7870 }, { "epoch": 1.85, "learning_rate": 6.791005558420055e-06, "loss": 0.0046, "step": 7871 }, { "epoch": 1.85, "learning_rate": 6.7886053355164725e-06, "loss": 0.0339, "step": 7872 }, { "epoch": 1.85, "learning_rate": 6.786205318880144e-06, "loss": 0.0025, "step": 7873 }, { "epoch": 1.85, "learning_rate": 6.783805508665224e-06, "loss": 0.0009, "step": 7874 }, { "epoch": 1.85, "learning_rate": 6.781405905025849e-06, "loss": 0.081, "step": 7875 }, { "epoch": 1.85, "learning_rate": 6.779006508116151e-06, "loss": 0.0322, "step": 7876 }, { "epoch": 1.85, "learning_rate": 6.7766073180902345e-06, "loss": 0.0296, "step": 7877 }, { "epoch": 1.85, "learning_rate": 6.77420833510221e-06, "loss": 0.006, "step": 7878 }, { "epoch": 1.85, "learning_rate": 6.7718095593061505e-06, "loss": 0.0191, "step": 7879 }, { "epoch": 1.85, "learning_rate": 6.769410990856137e-06, "loss": 0.0002, "step": 7880 }, { "epoch": 1.85, "learning_rate": 6.76701262990623e-06, "loss": 0.0101, "step": 7881 }, { "epoch": 1.85, "learning_rate": 6.764614476610472e-06, "loss": 0.0013, "step": 7882 }, { "epoch": 1.85, "learning_rate": 6.762216531122898e-06, "loss": 0.0421, "step": 7883 }, { "epoch": 1.85, "learning_rate": 6.759818793597527e-06, "loss": 0.0005, "step": 7884 }, { "epoch": 1.85, "learning_rate": 6.7574212641883665e-06, "loss": 0.0498, "step": 7885 }, { "epoch": 1.85, "learning_rate": 6.755023943049405e-06, "loss": 0.0005, "step": 7886 }, { "epoch": 1.85, "learning_rate": 6.752626830334625e-06, "loss": 0.0549, "step": 7887 }, { "epoch": 1.85, "learning_rate": 6.750229926197995e-06, "loss": 0.0011, "step": 7888 }, { "epoch": 1.85, "learning_rate": 6.747833230793464e-06, "loss": 0.0086, "step": 7889 }, { "epoch": 1.85, "learning_rate": 6.745436744274973e-06, "loss": 0.0044, "step": 7890 }, { "epoch": 1.85, "learning_rate": 6.743040466796448e-06, "loss": 0.0389, "step": 7891 }, { "epoch": 1.85, "learning_rate": 6.740644398511801e-06, "loss": 0.0023, "step": 7892 }, { "epoch": 1.85, "learning_rate": 6.738248539574927e-06, "loss": 0.0027, "step": 7893 }, { "epoch": 1.85, "learning_rate": 6.735852890139722e-06, "loss": 0.0132, "step": 7894 }, { "epoch": 1.85, "learning_rate": 6.733457450360044e-06, "loss": 0.005, "step": 7895 }, { "epoch": 1.85, "learning_rate": 6.731062220389763e-06, "loss": 0.0027, "step": 7896 }, { "epoch": 1.85, "learning_rate": 6.728667200382722e-06, "loss": 0.038, "step": 7897 }, { "epoch": 1.85, "learning_rate": 6.726272390492747e-06, "loss": 0.0021, "step": 7898 }, { "epoch": 1.85, "learning_rate": 6.723877790873663e-06, "loss": 0.0115, "step": 7899 }, { "epoch": 1.85, "learning_rate": 6.721483401679271e-06, "loss": 0.0375, "step": 7900 }, { "epoch": 1.85, "learning_rate": 6.719089223063364e-06, "loss": 0.0587, "step": 7901 }, { "epoch": 1.85, "learning_rate": 6.716695255179717e-06, "loss": 0.0771, "step": 7902 }, { "epoch": 1.85, "learning_rate": 6.714301498182097e-06, "loss": 0.0018, "step": 7903 }, { "epoch": 1.86, "learning_rate": 6.711907952224253e-06, "loss": 0.0098, "step": 7904 }, { "epoch": 1.86, "learning_rate": 6.70951461745992e-06, "loss": 0.0352, "step": 7905 }, { "epoch": 1.86, "learning_rate": 6.707121494042827e-06, "loss": 0.0185, "step": 7906 }, { "epoch": 1.86, "learning_rate": 6.704728582126677e-06, "loss": 0.0287, "step": 7907 }, { "epoch": 1.86, "learning_rate": 6.702335881865173e-06, "loss": 0.0111, "step": 7908 }, { "epoch": 1.86, "learning_rate": 6.699943393411992e-06, "loss": 0.0371, "step": 7909 }, { "epoch": 1.86, "learning_rate": 6.697551116920807e-06, "loss": 0.0031, "step": 7910 }, { "epoch": 1.86, "learning_rate": 6.695159052545268e-06, "loss": 0.0435, "step": 7911 }, { "epoch": 1.86, "learning_rate": 6.6927672004390275e-06, "loss": 0.0175, "step": 7912 }, { "epoch": 1.86, "learning_rate": 6.690375560755702e-06, "loss": 0.0101, "step": 7913 }, { "epoch": 1.86, "learning_rate": 6.68798413364891e-06, "loss": 0.0198, "step": 7914 }, { "epoch": 1.86, "learning_rate": 6.685592919272259e-06, "loss": 0.0301, "step": 7915 }, { "epoch": 1.86, "learning_rate": 6.683201917779326e-06, "loss": 0.0206, "step": 7916 }, { "epoch": 1.86, "learning_rate": 6.6808111293236925e-06, "loss": 0.0072, "step": 7917 }, { "epoch": 1.86, "learning_rate": 6.678420554058913e-06, "loss": 0.0555, "step": 7918 }, { "epoch": 1.86, "learning_rate": 6.676030192138536e-06, "loss": 0.0328, "step": 7919 }, { "epoch": 1.86, "learning_rate": 6.673640043716094e-06, "loss": 0.0034, "step": 7920 }, { "epoch": 1.86, "learning_rate": 6.6712501089451065e-06, "loss": 0.0031, "step": 7921 }, { "epoch": 1.86, "learning_rate": 6.668860387979077e-06, "loss": 0.0051, "step": 7922 }, { "epoch": 1.86, "learning_rate": 6.666470880971495e-06, "loss": 0.0022, "step": 7923 }, { "epoch": 1.86, "learning_rate": 6.664081588075843e-06, "loss": 0.0088, "step": 7924 }, { "epoch": 1.86, "learning_rate": 6.661692509445582e-06, "loss": 0.0494, "step": 7925 }, { "epoch": 1.86, "learning_rate": 6.6593036452341634e-06, "loss": 0.0335, "step": 7926 }, { "epoch": 1.86, "learning_rate": 6.65691499559502e-06, "loss": 0.0081, "step": 7927 }, { "epoch": 1.86, "learning_rate": 6.654526560681579e-06, "loss": 0.0072, "step": 7928 }, { "epoch": 1.86, "learning_rate": 6.652138340647243e-06, "loss": 0.0406, "step": 7929 }, { "epoch": 1.86, "learning_rate": 6.649750335645413e-06, "loss": 0.0021, "step": 7930 }, { "epoch": 1.86, "learning_rate": 6.647362545829468e-06, "loss": 0.0047, "step": 7931 }, { "epoch": 1.86, "learning_rate": 6.6449749713527755e-06, "loss": 0.0173, "step": 7932 }, { "epoch": 1.86, "learning_rate": 6.64258761236869e-06, "loss": 0.0367, "step": 7933 }, { "epoch": 1.86, "learning_rate": 6.640200469030547e-06, "loss": 0.0195, "step": 7934 }, { "epoch": 1.86, "learning_rate": 6.637813541491676e-06, "loss": 0.0085, "step": 7935 }, { "epoch": 1.86, "learning_rate": 6.635426829905387e-06, "loss": 0.0521, "step": 7936 }, { "epoch": 1.86, "learning_rate": 6.633040334424981e-06, "loss": 0.0015, "step": 7937 }, { "epoch": 1.86, "learning_rate": 6.630654055203738e-06, "loss": 0.0639, "step": 7938 }, { "epoch": 1.86, "learning_rate": 6.62826799239493e-06, "loss": 0.0287, "step": 7939 }, { "epoch": 1.86, "learning_rate": 6.625882146151814e-06, "loss": 0.0033, "step": 7940 }, { "epoch": 1.86, "learning_rate": 6.623496516627632e-06, "loss": 0.0248, "step": 7941 }, { "epoch": 1.86, "learning_rate": 6.621111103975615e-06, "loss": 0.0439, "step": 7942 }, { "epoch": 1.86, "learning_rate": 6.618725908348971e-06, "loss": 0.0025, "step": 7943 }, { "epoch": 1.86, "learning_rate": 6.616340929900909e-06, "loss": 0.029, "step": 7944 }, { "epoch": 1.86, "learning_rate": 6.613956168784607e-06, "loss": 0.0154, "step": 7945 }, { "epoch": 1.86, "learning_rate": 6.6115716251532454e-06, "loss": 0.0288, "step": 7946 }, { "epoch": 1.87, "learning_rate": 6.609187299159976e-06, "loss": 0.0045, "step": 7947 }, { "epoch": 1.87, "learning_rate": 6.60680319095795e-06, "loss": 0.0008, "step": 7948 }, { "epoch": 1.87, "learning_rate": 6.604419300700298e-06, "loss": 0.036, "step": 7949 }, { "epoch": 1.87, "learning_rate": 6.6020356285401314e-06, "loss": 0.0027, "step": 7950 }, { "epoch": 1.87, "learning_rate": 6.599652174630558e-06, "loss": 0.0046, "step": 7951 }, { "epoch": 1.87, "learning_rate": 6.597268939124662e-06, "loss": 0.0017, "step": 7952 }, { "epoch": 1.87, "learning_rate": 6.594885922175523e-06, "loss": 0.0057, "step": 7953 }, { "epoch": 1.87, "learning_rate": 6.592503123936197e-06, "loss": 0.027, "step": 7954 }, { "epoch": 1.87, "learning_rate": 6.590120544559736e-06, "loss": 0.0207, "step": 7955 }, { "epoch": 1.87, "learning_rate": 6.587738184199167e-06, "loss": 0.002, "step": 7956 }, { "epoch": 1.87, "learning_rate": 6.585356043007511e-06, "loss": 0.0174, "step": 7957 }, { "epoch": 1.87, "learning_rate": 6.5829741211377775e-06, "loss": 0.0027, "step": 7958 }, { "epoch": 1.87, "learning_rate": 6.580592418742948e-06, "loss": 0.0079, "step": 7959 }, { "epoch": 1.87, "learning_rate": 6.578210935976003e-06, "loss": 0.0395, "step": 7960 }, { "epoch": 1.87, "learning_rate": 6.575829672989906e-06, "loss": 0.0192, "step": 7961 }, { "epoch": 1.87, "learning_rate": 6.573448629937604e-06, "loss": 0.039, "step": 7962 }, { "epoch": 1.87, "learning_rate": 6.571067806972029e-06, "loss": 0.0514, "step": 7963 }, { "epoch": 1.87, "learning_rate": 6.568687204246105e-06, "loss": 0.0324, "step": 7964 }, { "epoch": 1.87, "learning_rate": 6.566306821912729e-06, "loss": 0.0066, "step": 7965 }, { "epoch": 1.87, "learning_rate": 6.563926660124802e-06, "loss": 0.0347, "step": 7966 }, { "epoch": 1.87, "learning_rate": 6.561546719035199e-06, "loss": 0.0251, "step": 7967 }, { "epoch": 1.87, "learning_rate": 6.559166998796781e-06, "loss": 0.0369, "step": 7968 }, { "epoch": 1.87, "learning_rate": 6.5567874995623995e-06, "loss": 0.0255, "step": 7969 }, { "epoch": 1.87, "learning_rate": 6.554408221484884e-06, "loss": 0.0148, "step": 7970 }, { "epoch": 1.87, "learning_rate": 6.552029164717064e-06, "loss": 0.0292, "step": 7971 }, { "epoch": 1.87, "learning_rate": 6.549650329411737e-06, "loss": 0.0404, "step": 7972 }, { "epoch": 1.87, "learning_rate": 6.547271715721699e-06, "loss": 0.0074, "step": 7973 }, { "epoch": 1.87, "learning_rate": 6.544893323799729e-06, "loss": 0.0012, "step": 7974 }, { "epoch": 1.87, "learning_rate": 6.542515153798588e-06, "loss": 0.0044, "step": 7975 }, { "epoch": 1.87, "learning_rate": 6.540137205871029e-06, "loss": 0.0593, "step": 7976 }, { "epoch": 1.87, "learning_rate": 6.537759480169783e-06, "loss": 0.0215, "step": 7977 }, { "epoch": 1.87, "learning_rate": 6.535381976847573e-06, "loss": 0.044, "step": 7978 }, { "epoch": 1.87, "learning_rate": 6.5330046960571045e-06, "loss": 0.0378, "step": 7979 }, { "epoch": 1.87, "learning_rate": 6.530627637951072e-06, "loss": 0.0013, "step": 7980 }, { "epoch": 1.87, "learning_rate": 6.528250802682152e-06, "loss": 0.0107, "step": 7981 }, { "epoch": 1.87, "learning_rate": 6.525874190403004e-06, "loss": 0.0594, "step": 7982 }, { "epoch": 1.87, "learning_rate": 6.523497801266286e-06, "loss": 0.0441, "step": 7983 }, { "epoch": 1.87, "learning_rate": 6.521121635424628e-06, "loss": 0.0156, "step": 7984 }, { "epoch": 1.87, "learning_rate": 6.518745693030653e-06, "loss": 0.0076, "step": 7985 }, { "epoch": 1.87, "learning_rate": 6.516369974236961e-06, "loss": 0.0027, "step": 7986 }, { "epoch": 1.87, "learning_rate": 6.513994479196153e-06, "loss": 0.0407, "step": 7987 }, { "epoch": 1.87, "learning_rate": 6.5116192080608e-06, "loss": 0.0058, "step": 7988 }, { "epoch": 1.87, "learning_rate": 6.5092441609834675e-06, "loss": 0.0509, "step": 7989 }, { "epoch": 1.88, "learning_rate": 6.506869338116703e-06, "loss": 0.1181, "step": 7990 }, { "epoch": 1.88, "learning_rate": 6.504494739613043e-06, "loss": 0.0291, "step": 7991 }, { "epoch": 1.88, "learning_rate": 6.502120365625009e-06, "loss": 0.0037, "step": 7992 }, { "epoch": 1.88, "learning_rate": 6.499746216305101e-06, "loss": 0.0107, "step": 7993 }, { "epoch": 1.88, "learning_rate": 6.497372291805815e-06, "loss": 0.0045, "step": 7994 }, { "epoch": 1.88, "learning_rate": 6.494998592279625e-06, "loss": 0.0252, "step": 7995 }, { "epoch": 1.88, "learning_rate": 6.492625117878995e-06, "loss": 0.0132, "step": 7996 }, { "epoch": 1.88, "learning_rate": 6.49025186875637e-06, "loss": 0.0096, "step": 7997 }, { "epoch": 1.88, "learning_rate": 6.4878788450641886e-06, "loss": 0.0147, "step": 7998 }, { "epoch": 1.88, "learning_rate": 6.485506046954863e-06, "loss": 0.003, "step": 7999 }, { "epoch": 1.88, "learning_rate": 6.483133474580799e-06, "loss": 0.0023, "step": 8000 }, { "epoch": 1.88, "learning_rate": 6.480761128094393e-06, "loss": 0.0094, "step": 8001 }, { "epoch": 1.88, "learning_rate": 6.478389007648014e-06, "loss": 0.0216, "step": 8002 }, { "epoch": 1.88, "learning_rate": 6.476017113394026e-06, "loss": 0.0085, "step": 8003 }, { "epoch": 1.88, "learning_rate": 6.473645445484773e-06, "loss": 0.0099, "step": 8004 }, { "epoch": 1.88, "learning_rate": 6.471274004072589e-06, "loss": 0.0437, "step": 8005 }, { "epoch": 1.88, "learning_rate": 6.468902789309788e-06, "loss": 0.0139, "step": 8006 }, { "epoch": 1.88, "learning_rate": 6.466531801348676e-06, "loss": 0.0007, "step": 8007 }, { "epoch": 1.88, "learning_rate": 6.464161040341539e-06, "loss": 0.015, "step": 8008 }, { "epoch": 1.88, "learning_rate": 6.461790506440651e-06, "loss": 0.0078, "step": 8009 }, { "epoch": 1.88, "learning_rate": 6.459420199798273e-06, "loss": 0.0019, "step": 8010 }, { "epoch": 1.88, "learning_rate": 6.4570501205666454e-06, "loss": 0.023, "step": 8011 }, { "epoch": 1.88, "learning_rate": 6.454680268898002e-06, "loss": 0.0009, "step": 8012 }, { "epoch": 1.88, "learning_rate": 6.452310644944553e-06, "loss": 0.0041, "step": 8013 }, { "epoch": 1.88, "learning_rate": 6.449941248858505e-06, "loss": 0.0148, "step": 8014 }, { "epoch": 1.88, "learning_rate": 6.447572080792037e-06, "loss": 0.0033, "step": 8015 }, { "epoch": 1.88, "learning_rate": 6.4452031408973235e-06, "loss": 0.0029, "step": 8016 }, { "epoch": 1.88, "learning_rate": 6.442834429326528e-06, "loss": 0.0039, "step": 8017 }, { "epoch": 1.88, "learning_rate": 6.440465946231777e-06, "loss": 0.0088, "step": 8018 }, { "epoch": 1.88, "learning_rate": 6.438097691765215e-06, "loss": 0.0106, "step": 8019 }, { "epoch": 1.88, "learning_rate": 6.435729666078938e-06, "loss": 0.0157, "step": 8020 }, { "epoch": 1.88, "learning_rate": 6.433361869325058e-06, "loss": 0.0175, "step": 8021 }, { "epoch": 1.88, "learning_rate": 6.430994301655649e-06, "loss": 0.0808, "step": 8022 }, { "epoch": 1.88, "learning_rate": 6.4286269632227835e-06, "loss": 0.0124, "step": 8023 }, { "epoch": 1.88, "learning_rate": 6.426259854178514e-06, "loss": 0.0412, "step": 8024 }, { "epoch": 1.88, "learning_rate": 6.423892974674878e-06, "loss": 0.0055, "step": 8025 }, { "epoch": 1.88, "learning_rate": 6.421526324863904e-06, "loss": 0.0575, "step": 8026 }, { "epoch": 1.88, "learning_rate": 6.419159904897596e-06, "loss": 0.0027, "step": 8027 }, { "epoch": 1.88, "learning_rate": 6.416793714927953e-06, "loss": 0.0162, "step": 8028 }, { "epoch": 1.88, "learning_rate": 6.414427755106953e-06, "loss": 0.0748, "step": 8029 }, { "epoch": 1.88, "learning_rate": 6.4120620255865625e-06, "loss": 0.0006, "step": 8030 }, { "epoch": 1.88, "learning_rate": 6.409696526518729e-06, "loss": 0.0022, "step": 8031 }, { "epoch": 1.89, "learning_rate": 6.407331258055392e-06, "loss": 0.0003, "step": 8032 }, { "epoch": 1.89, "learning_rate": 6.404966220348467e-06, "loss": 0.0239, "step": 8033 }, { "epoch": 1.89, "learning_rate": 6.402601413549862e-06, "loss": 0.0379, "step": 8034 }, { "epoch": 1.89, "learning_rate": 6.400236837811474e-06, "loss": 0.0312, "step": 8035 }, { "epoch": 1.89, "learning_rate": 6.397872493285169e-06, "loss": 0.0034, "step": 8036 }, { "epoch": 1.89, "learning_rate": 6.39550838012282e-06, "loss": 0.0326, "step": 8037 }, { "epoch": 1.89, "learning_rate": 6.393144498476261e-06, "loss": 0.0027, "step": 8038 }, { "epoch": 1.89, "learning_rate": 6.3907808484973336e-06, "loss": 0.04, "step": 8039 }, { "epoch": 1.89, "learning_rate": 6.38841743033785e-06, "loss": 0.0026, "step": 8040 }, { "epoch": 1.89, "learning_rate": 6.386054244149613e-06, "loss": 0.0265, "step": 8041 }, { "epoch": 1.89, "learning_rate": 6.383691290084409e-06, "loss": 0.0307, "step": 8042 }, { "epoch": 1.89, "learning_rate": 6.381328568294011e-06, "loss": 0.0304, "step": 8043 }, { "epoch": 1.89, "learning_rate": 6.378966078930176e-06, "loss": 0.0564, "step": 8044 }, { "epoch": 1.89, "learning_rate": 6.376603822144646e-06, "loss": 0.0101, "step": 8045 }, { "epoch": 1.89, "learning_rate": 6.3742417980891495e-06, "loss": 0.0004, "step": 8046 }, { "epoch": 1.89, "learning_rate": 6.371880006915395e-06, "loss": 0.0031, "step": 8047 }, { "epoch": 1.89, "learning_rate": 6.369518448775085e-06, "loss": 0.0973, "step": 8048 }, { "epoch": 1.89, "learning_rate": 6.3671571238198985e-06, "loss": 0.0007, "step": 8049 }, { "epoch": 1.89, "learning_rate": 6.3647960322015e-06, "loss": 0.0451, "step": 8050 }, { "epoch": 1.89, "learning_rate": 6.362435174071553e-06, "loss": 0.0435, "step": 8051 }, { "epoch": 1.89, "learning_rate": 6.360074549581681e-06, "loss": 0.0482, "step": 8052 }, { "epoch": 1.89, "learning_rate": 6.35771415888352e-06, "loss": 0.0117, "step": 8053 }, { "epoch": 1.89, "learning_rate": 6.355354002128664e-06, "loss": 0.0044, "step": 8054 }, { "epoch": 1.89, "learning_rate": 6.3529940794687195e-06, "loss": 0.0232, "step": 8055 }, { "epoch": 1.89, "learning_rate": 6.35063439105525e-06, "loss": 0.0212, "step": 8056 }, { "epoch": 1.89, "learning_rate": 6.348274937039827e-06, "loss": 0.001, "step": 8057 }, { "epoch": 1.89, "learning_rate": 6.345915717573995e-06, "loss": 0.0034, "step": 8058 }, { "epoch": 1.89, "learning_rate": 6.343556732809285e-06, "loss": 0.0029, "step": 8059 }, { "epoch": 1.89, "learning_rate": 6.341197982897218e-06, "loss": 0.0071, "step": 8060 }, { "epoch": 1.89, "learning_rate": 6.338839467989292e-06, "loss": 0.0008, "step": 8061 }, { "epoch": 1.89, "learning_rate": 6.336481188236996e-06, "loss": 0.0875, "step": 8062 }, { "epoch": 1.89, "learning_rate": 6.3341231437918e-06, "loss": 0.0131, "step": 8063 }, { "epoch": 1.89, "learning_rate": 6.331765334805164e-06, "loss": 0.0003, "step": 8064 }, { "epoch": 1.89, "learning_rate": 6.329407761428525e-06, "loss": 0.0216, "step": 8065 }, { "epoch": 1.89, "learning_rate": 6.327050423813317e-06, "loss": 0.0064, "step": 8066 }, { "epoch": 1.89, "learning_rate": 6.324693322110942e-06, "loss": 0.0039, "step": 8067 }, { "epoch": 1.89, "learning_rate": 6.322336456472798e-06, "loss": 0.0003, "step": 8068 }, { "epoch": 1.89, "learning_rate": 6.319979827050275e-06, "loss": 0.0041, "step": 8069 }, { "epoch": 1.89, "learning_rate": 6.3176234339947285e-06, "loss": 0.0491, "step": 8070 }, { "epoch": 1.89, "learning_rate": 6.315267277457517e-06, "loss": 0.0021, "step": 8071 }, { "epoch": 1.89, "learning_rate": 6.312911357589967e-06, "loss": 0.0422, "step": 8072 }, { "epoch": 1.89, "learning_rate": 6.310555674543411e-06, "loss": 0.0362, "step": 8073 }, { "epoch": 1.89, "learning_rate": 6.308200228469139e-06, "loss": 0.0014, "step": 8074 }, { "epoch": 1.9, "learning_rate": 6.305845019518454e-06, "loss": 0.0013, "step": 8075 }, { "epoch": 1.9, "learning_rate": 6.303490047842623e-06, "loss": 0.0046, "step": 8076 }, { "epoch": 1.9, "learning_rate": 6.301135313592908e-06, "loss": 0.0002, "step": 8077 }, { "epoch": 1.9, "learning_rate": 6.298780816920556e-06, "loss": 0.0061, "step": 8078 }, { "epoch": 1.9, "learning_rate": 6.296426557976789e-06, "loss": 0.0321, "step": 8079 }, { "epoch": 1.9, "learning_rate": 6.294072536912828e-06, "loss": 0.0339, "step": 8080 }, { "epoch": 1.9, "learning_rate": 6.291718753879863e-06, "loss": 0.0076, "step": 8081 }, { "epoch": 1.9, "learning_rate": 6.289365209029086e-06, "loss": 0.04, "step": 8082 }, { "epoch": 1.9, "learning_rate": 6.287011902511658e-06, "loss": 0.0134, "step": 8083 }, { "epoch": 1.9, "learning_rate": 6.284658834478736e-06, "loss": 0.0093, "step": 8084 }, { "epoch": 1.9, "learning_rate": 6.282306005081453e-06, "loss": 0.0074, "step": 8085 }, { "epoch": 1.9, "learning_rate": 6.279953414470931e-06, "loss": 0.0028, "step": 8086 }, { "epoch": 1.9, "learning_rate": 6.277601062798284e-06, "loss": 0.0148, "step": 8087 }, { "epoch": 1.9, "learning_rate": 6.275248950214592e-06, "loss": 0.0372, "step": 8088 }, { "epoch": 1.9, "learning_rate": 6.272897076870941e-06, "loss": 0.0193, "step": 8089 }, { "epoch": 1.9, "learning_rate": 6.270545442918381e-06, "loss": 0.0061, "step": 8090 }, { "epoch": 1.9, "learning_rate": 6.268194048507967e-06, "loss": 0.0651, "step": 8091 }, { "epoch": 1.9, "learning_rate": 6.265842893790719e-06, "loss": 0.0405, "step": 8092 }, { "epoch": 1.9, "learning_rate": 6.263491978917658e-06, "loss": 0.0032, "step": 8093 }, { "epoch": 1.9, "learning_rate": 6.261141304039784e-06, "loss": 0.0024, "step": 8094 }, { "epoch": 1.9, "learning_rate": 6.258790869308074e-06, "loss": 0.0082, "step": 8095 }, { "epoch": 1.9, "learning_rate": 6.256440674873503e-06, "loss": 0.0291, "step": 8096 }, { "epoch": 1.9, "learning_rate": 6.254090720887017e-06, "loss": 0.0315, "step": 8097 }, { "epoch": 1.9, "learning_rate": 6.251741007499559e-06, "loss": 0.012, "step": 8098 }, { "epoch": 1.9, "learning_rate": 6.249391534862044e-06, "loss": 0.0279, "step": 8099 }, { "epoch": 1.9, "learning_rate": 6.247042303125383e-06, "loss": 0.0011, "step": 8100 }, { "epoch": 1.9, "learning_rate": 6.2446933124404664e-06, "loss": 0.0074, "step": 8101 }, { "epoch": 1.9, "learning_rate": 6.2423445629581645e-06, "loss": 0.0137, "step": 8102 }, { "epoch": 1.9, "learning_rate": 6.239996054829347e-06, "loss": 0.0196, "step": 8103 }, { "epoch": 1.9, "learning_rate": 6.237647788204848e-06, "loss": 0.073, "step": 8104 }, { "epoch": 1.9, "learning_rate": 6.235299763235505e-06, "loss": 0.0869, "step": 8105 }, { "epoch": 1.9, "learning_rate": 6.232951980072119e-06, "loss": 0.0031, "step": 8106 }, { "epoch": 1.9, "learning_rate": 6.230604438865504e-06, "loss": 0.0016, "step": 8107 }, { "epoch": 1.9, "learning_rate": 6.228257139766427e-06, "loss": 0.0227, "step": 8108 }, { "epoch": 1.9, "learning_rate": 6.2259100829256655e-06, "loss": 0.0557, "step": 8109 }, { "epoch": 1.9, "learning_rate": 6.223563268493959e-06, "loss": 0.0253, "step": 8110 }, { "epoch": 1.9, "learning_rate": 6.2212166966220545e-06, "loss": 0.0326, "step": 8111 }, { "epoch": 1.9, "learning_rate": 6.218870367460668e-06, "loss": 0.025, "step": 8112 }, { "epoch": 1.9, "learning_rate": 6.216524281160501e-06, "loss": 0.0003, "step": 8113 }, { "epoch": 1.9, "learning_rate": 6.214178437872247e-06, "loss": 0.0432, "step": 8114 }, { "epoch": 1.9, "learning_rate": 6.211832837746572e-06, "loss": 0.0042, "step": 8115 }, { "epoch": 1.9, "learning_rate": 6.209487480934141e-06, "loss": 0.0098, "step": 8116 }, { "epoch": 1.91, "learning_rate": 6.207142367585591e-06, "loss": 0.0072, "step": 8117 }, { "epoch": 1.91, "learning_rate": 6.20479749785155e-06, "loss": 0.054, "step": 8118 }, { "epoch": 1.91, "learning_rate": 6.202452871882627e-06, "loss": 0.0542, "step": 8119 }, { "epoch": 1.91, "learning_rate": 6.200108489829416e-06, "loss": 0.0135, "step": 8120 }, { "epoch": 1.91, "learning_rate": 6.197764351842505e-06, "loss": 0.0264, "step": 8121 }, { "epoch": 1.91, "learning_rate": 6.195420458072443e-06, "loss": 0.0116, "step": 8122 }, { "epoch": 1.91, "learning_rate": 6.193076808669794e-06, "loss": 0.076, "step": 8123 }, { "epoch": 1.91, "learning_rate": 6.190733403785073e-06, "loss": 0.0357, "step": 8124 }, { "epoch": 1.91, "learning_rate": 6.1883902435688125e-06, "loss": 0.0347, "step": 8125 }, { "epoch": 1.91, "learning_rate": 6.186047328171501e-06, "loss": 0.0455, "step": 8126 }, { "epoch": 1.91, "learning_rate": 6.183704657743632e-06, "loss": 0.0369, "step": 8127 }, { "epoch": 1.91, "learning_rate": 6.1813622324356655e-06, "loss": 0.0557, "step": 8128 }, { "epoch": 1.91, "learning_rate": 6.179020052398064e-06, "loss": 0.0454, "step": 8129 }, { "epoch": 1.91, "learning_rate": 6.176678117781263e-06, "loss": 0.0284, "step": 8130 }, { "epoch": 1.91, "learning_rate": 6.174336428735682e-06, "loss": 0.0114, "step": 8131 }, { "epoch": 1.91, "learning_rate": 6.171994985411731e-06, "loss": 0.0329, "step": 8132 }, { "epoch": 1.91, "learning_rate": 6.169653787959795e-06, "loss": 0.0067, "step": 8133 }, { "epoch": 1.91, "learning_rate": 6.167312836530255e-06, "loss": 0.0067, "step": 8134 }, { "epoch": 1.91, "learning_rate": 6.164972131273463e-06, "loss": 0.0206, "step": 8135 }, { "epoch": 1.91, "learning_rate": 6.162631672339766e-06, "loss": 0.0093, "step": 8136 }, { "epoch": 1.91, "learning_rate": 6.160291459879495e-06, "loss": 0.0344, "step": 8137 }, { "epoch": 1.91, "learning_rate": 6.157951494042953e-06, "loss": 0.0149, "step": 8138 }, { "epoch": 1.91, "learning_rate": 6.155611774980445e-06, "loss": 0.0158, "step": 8139 }, { "epoch": 1.91, "learning_rate": 6.153272302842239e-06, "loss": 0.0053, "step": 8140 }, { "epoch": 1.91, "learning_rate": 6.150933077778612e-06, "loss": 0.0098, "step": 8141 }, { "epoch": 1.91, "learning_rate": 6.148594099939799e-06, "loss": 0.0011, "step": 8142 }, { "epoch": 1.91, "learning_rate": 6.146255369476044e-06, "loss": 0.0007, "step": 8143 }, { "epoch": 1.91, "learning_rate": 6.143916886537553e-06, "loss": 0.0545, "step": 8144 }, { "epoch": 1.91, "learning_rate": 6.141578651274531e-06, "loss": 0.0468, "step": 8145 }, { "epoch": 1.91, "learning_rate": 6.139240663837167e-06, "loss": 0.018, "step": 8146 }, { "epoch": 1.91, "learning_rate": 6.136902924375621e-06, "loss": 0.0482, "step": 8147 }, { "epoch": 1.91, "learning_rate": 6.134565433040052e-06, "loss": 0.0218, "step": 8148 }, { "epoch": 1.91, "learning_rate": 6.132228189980591e-06, "loss": 0.0134, "step": 8149 }, { "epoch": 1.91, "learning_rate": 6.129891195347365e-06, "loss": 0.0286, "step": 8150 }, { "epoch": 1.91, "learning_rate": 6.127554449290473e-06, "loss": 0.0202, "step": 8151 }, { "epoch": 1.91, "learning_rate": 6.1252179519600075e-06, "loss": 0.0378, "step": 8152 }, { "epoch": 1.91, "learning_rate": 6.122881703506038e-06, "loss": 0.0047, "step": 8153 }, { "epoch": 1.91, "learning_rate": 6.120545704078622e-06, "loss": 0.0226, "step": 8154 }, { "epoch": 1.91, "learning_rate": 6.118209953827807e-06, "loss": 0.0011, "step": 8155 }, { "epoch": 1.91, "learning_rate": 6.1158744529036065e-06, "loss": 0.0504, "step": 8156 }, { "epoch": 1.91, "learning_rate": 6.11353920145604e-06, "loss": 0.0075, "step": 8157 }, { "epoch": 1.91, "learning_rate": 6.11120419963509e-06, "loss": 0.0073, "step": 8158 }, { "epoch": 1.91, "learning_rate": 6.108869447590745e-06, "loss": 0.0051, "step": 8159 }, { "epoch": 1.92, "learning_rate": 6.106534945472953e-06, "loss": 0.0288, "step": 8160 }, { "epoch": 1.92, "learning_rate": 6.10420069343167e-06, "loss": 0.025, "step": 8161 }, { "epoch": 1.92, "learning_rate": 6.101866691616813e-06, "loss": 0.0113, "step": 8162 }, { "epoch": 1.92, "learning_rate": 6.099532940178306e-06, "loss": 0.0038, "step": 8163 }, { "epoch": 1.92, "learning_rate": 6.097199439266039e-06, "loss": 0.005, "step": 8164 }, { "epoch": 1.92, "learning_rate": 6.094866189029892e-06, "loss": 0.0118, "step": 8165 }, { "epoch": 1.92, "learning_rate": 6.092533189619734e-06, "loss": 0.0021, "step": 8166 }, { "epoch": 1.92, "learning_rate": 6.090200441185405e-06, "loss": 0.0074, "step": 8167 }, { "epoch": 1.92, "learning_rate": 6.087867943876746e-06, "loss": 0.0072, "step": 8168 }, { "epoch": 1.92, "learning_rate": 6.0855356978435675e-06, "loss": 0.0344, "step": 8169 }, { "epoch": 1.92, "learning_rate": 6.083203703235671e-06, "loss": 0.0267, "step": 8170 }, { "epoch": 1.92, "learning_rate": 6.080871960202837e-06, "loss": 0.012, "step": 8171 }, { "epoch": 1.92, "learning_rate": 6.078540468894834e-06, "loss": 0.0308, "step": 8172 }, { "epoch": 1.92, "learning_rate": 6.07620922946142e-06, "loss": 0.0107, "step": 8173 }, { "epoch": 1.92, "learning_rate": 6.07387824205232e-06, "loss": 0.0426, "step": 8174 }, { "epoch": 1.92, "learning_rate": 6.071547506817263e-06, "loss": 0.0073, "step": 8175 }, { "epoch": 1.92, "learning_rate": 6.06921702390594e-06, "loss": 0.0461, "step": 8176 }, { "epoch": 1.92, "learning_rate": 6.066886793468049e-06, "loss": 0.0433, "step": 8177 }, { "epoch": 1.92, "learning_rate": 6.064556815653249e-06, "loss": 0.0045, "step": 8178 }, { "epoch": 1.92, "learning_rate": 6.062227090611202e-06, "loss": 0.01, "step": 8179 }, { "epoch": 1.92, "learning_rate": 6.059897618491546e-06, "loss": 0.001, "step": 8180 }, { "epoch": 1.92, "learning_rate": 6.057568399443898e-06, "loss": 0.009, "step": 8181 }, { "epoch": 1.92, "learning_rate": 6.055239433617868e-06, "loss": 0.0467, "step": 8182 }, { "epoch": 1.92, "learning_rate": 6.05291072116304e-06, "loss": 0.0023, "step": 8183 }, { "epoch": 1.92, "learning_rate": 6.050582262228992e-06, "loss": 0.0118, "step": 8184 }, { "epoch": 1.92, "learning_rate": 6.048254056965275e-06, "loss": 0.0027, "step": 8185 }, { "epoch": 1.92, "learning_rate": 6.045926105521434e-06, "loss": 0.0529, "step": 8186 }, { "epoch": 1.92, "learning_rate": 6.043598408046989e-06, "loss": 0.0305, "step": 8187 }, { "epoch": 1.92, "learning_rate": 6.0412709646914484e-06, "loss": 0.0267, "step": 8188 }, { "epoch": 1.92, "learning_rate": 6.038943775604307e-06, "loss": 0.0296, "step": 8189 }, { "epoch": 1.92, "learning_rate": 6.036616840935034e-06, "loss": 0.0057, "step": 8190 }, { "epoch": 1.92, "learning_rate": 6.0342901608330966e-06, "loss": 0.0422, "step": 8191 }, { "epoch": 1.92, "learning_rate": 6.031963735447924e-06, "loss": 0.026, "step": 8192 }, { "epoch": 1.92, "learning_rate": 6.0296375649289564e-06, "loss": 0.0007, "step": 8193 }, { "epoch": 1.92, "learning_rate": 6.027311649425591e-06, "loss": 0.0031, "step": 8194 }, { "epoch": 1.92, "learning_rate": 6.024985989087232e-06, "loss": 0.0127, "step": 8195 }, { "epoch": 1.92, "learning_rate": 6.022660584063245e-06, "loss": 0.0261, "step": 8196 }, { "epoch": 1.92, "learning_rate": 6.020335434502996e-06, "loss": 0.0058, "step": 8197 }, { "epoch": 1.92, "learning_rate": 6.018010540555832e-06, "loss": 0.0904, "step": 8198 }, { "epoch": 1.92, "learning_rate": 6.015685902371075e-06, "loss": 0.0065, "step": 8199 }, { "epoch": 1.92, "learning_rate": 6.01336152009804e-06, "loss": 0.0087, "step": 8200 }, { "epoch": 1.92, "learning_rate": 6.011037393886017e-06, "loss": 0.0062, "step": 8201 }, { "epoch": 1.92, "learning_rate": 6.0087135238842895e-06, "loss": 0.0643, "step": 8202 }, { "epoch": 1.93, "learning_rate": 6.006389910242114e-06, "loss": 0.0104, "step": 8203 }, { "epoch": 1.93, "learning_rate": 6.004066553108742e-06, "loss": 0.0084, "step": 8204 }, { "epoch": 1.93, "learning_rate": 6.001743452633395e-06, "loss": 0.0036, "step": 8205 }, { "epoch": 1.93, "learning_rate": 5.9994206089652905e-06, "loss": 0.0009, "step": 8206 }, { "epoch": 1.93, "learning_rate": 5.997098022253625e-06, "loss": 0.0153, "step": 8207 }, { "epoch": 1.93, "learning_rate": 5.9947756926475695e-06, "loss": 0.0023, "step": 8208 }, { "epoch": 1.93, "learning_rate": 5.9924536202963e-06, "loss": 0.0359, "step": 8209 }, { "epoch": 1.93, "learning_rate": 5.99013180534895e-06, "loss": 0.0091, "step": 8210 }, { "epoch": 1.93, "learning_rate": 5.9878102479546604e-06, "loss": 0.016, "step": 8211 }, { "epoch": 1.93, "learning_rate": 5.9854889482625335e-06, "loss": 0.0105, "step": 8212 }, { "epoch": 1.93, "learning_rate": 5.983167906421677e-06, "loss": 0.0197, "step": 8213 }, { "epoch": 1.93, "learning_rate": 5.980847122581158e-06, "loss": 0.002, "step": 8214 }, { "epoch": 1.93, "learning_rate": 5.978526596890051e-06, "loss": 0.063, "step": 8215 }, { "epoch": 1.93, "learning_rate": 5.976206329497399e-06, "loss": 0.0279, "step": 8216 }, { "epoch": 1.93, "learning_rate": 5.9738863205522325e-06, "loss": 0.0139, "step": 8217 }, { "epoch": 1.93, "learning_rate": 5.971566570203566e-06, "loss": 0.0013, "step": 8218 }, { "epoch": 1.93, "learning_rate": 5.969247078600394e-06, "loss": 0.0171, "step": 8219 }, { "epoch": 1.93, "learning_rate": 5.966927845891701e-06, "loss": 0.0658, "step": 8220 }, { "epoch": 1.93, "learning_rate": 5.964608872226447e-06, "loss": 0.0077, "step": 8221 }, { "epoch": 1.93, "learning_rate": 5.962290157753581e-06, "loss": 0.0152, "step": 8222 }, { "epoch": 1.93, "learning_rate": 5.959971702622034e-06, "loss": 0.0821, "step": 8223 }, { "epoch": 1.93, "learning_rate": 5.957653506980719e-06, "loss": 0.0757, "step": 8224 }, { "epoch": 1.93, "learning_rate": 5.9553355709785344e-06, "loss": 0.0076, "step": 8225 }, { "epoch": 1.93, "learning_rate": 5.953017894764357e-06, "loss": 0.0177, "step": 8226 }, { "epoch": 1.93, "learning_rate": 5.950700478487059e-06, "loss": 0.0053, "step": 8227 }, { "epoch": 1.93, "learning_rate": 5.9483833222954755e-06, "loss": 0.0055, "step": 8228 }, { "epoch": 1.93, "learning_rate": 5.94606642633845e-06, "loss": 0.0027, "step": 8229 }, { "epoch": 1.93, "learning_rate": 5.943749790764786e-06, "loss": 0.0154, "step": 8230 }, { "epoch": 1.93, "learning_rate": 5.941433415723286e-06, "loss": 0.0219, "step": 8231 }, { "epoch": 1.93, "learning_rate": 5.93911730136273e-06, "loss": 0.0218, "step": 8232 }, { "epoch": 1.93, "learning_rate": 5.936801447831879e-06, "loss": 0.0012, "step": 8233 }, { "epoch": 1.93, "learning_rate": 5.934485855279483e-06, "loss": 0.0201, "step": 8234 }, { "epoch": 1.93, "learning_rate": 5.932170523854268e-06, "loss": 0.0036, "step": 8235 }, { "epoch": 1.93, "learning_rate": 5.929855453704953e-06, "loss": 0.0209, "step": 8236 }, { "epoch": 1.93, "learning_rate": 5.927540644980229e-06, "loss": 0.0014, "step": 8237 }, { "epoch": 1.93, "learning_rate": 5.925226097828779e-06, "loss": 0.0008, "step": 8238 }, { "epoch": 1.93, "learning_rate": 5.922911812399263e-06, "loss": 0.0255, "step": 8239 }, { "epoch": 1.93, "learning_rate": 5.92059778884033e-06, "loss": 0.0007, "step": 8240 }, { "epoch": 1.93, "learning_rate": 5.918284027300609e-06, "loss": 0.0178, "step": 8241 }, { "epoch": 1.93, "learning_rate": 5.915970527928709e-06, "loss": 0.0188, "step": 8242 }, { "epoch": 1.93, "learning_rate": 5.913657290873232e-06, "loss": 0.0446, "step": 8243 }, { "epoch": 1.93, "learning_rate": 5.911344316282749e-06, "loss": 0.0087, "step": 8244 }, { "epoch": 1.94, "learning_rate": 5.90903160430583e-06, "loss": 0.0103, "step": 8245 }, { "epoch": 1.94, "learning_rate": 5.906719155091012e-06, "loss": 0.0201, "step": 8246 }, { "epoch": 1.94, "learning_rate": 5.904406968786833e-06, "loss": 0.0265, "step": 8247 }, { "epoch": 1.94, "learning_rate": 5.902095045541791e-06, "loss": 0.0035, "step": 8248 }, { "epoch": 1.94, "learning_rate": 5.899783385504392e-06, "loss": 0.0039, "step": 8249 }, { "epoch": 1.94, "learning_rate": 5.897471988823109e-06, "loss": 0.0302, "step": 8250 }, { "epoch": 1.94, "learning_rate": 5.895160855646402e-06, "loss": 0.0111, "step": 8251 }, { "epoch": 1.94, "learning_rate": 5.892849986122718e-06, "loss": 0.0012, "step": 8252 }, { "epoch": 1.94, "learning_rate": 5.890539380400479e-06, "loss": 0.0235, "step": 8253 }, { "epoch": 1.94, "learning_rate": 5.888229038628097e-06, "loss": 0.0427, "step": 8254 }, { "epoch": 1.94, "learning_rate": 5.885918960953965e-06, "loss": 0.0032, "step": 8255 }, { "epoch": 1.94, "learning_rate": 5.883609147526458e-06, "loss": 0.0024, "step": 8256 }, { "epoch": 1.94, "learning_rate": 5.881299598493937e-06, "loss": 0.0086, "step": 8257 }, { "epoch": 1.94, "learning_rate": 5.87899031400474e-06, "loss": 0.1033, "step": 8258 }, { "epoch": 1.94, "learning_rate": 5.876681294207197e-06, "loss": 0.0276, "step": 8259 }, { "epoch": 1.94, "learning_rate": 5.874372539249611e-06, "loss": 0.0004, "step": 8260 }, { "epoch": 1.94, "learning_rate": 5.872064049280277e-06, "loss": 0.0173, "step": 8261 }, { "epoch": 1.94, "learning_rate": 5.8697558244474654e-06, "loss": 0.0021, "step": 8262 }, { "epoch": 1.94, "learning_rate": 5.867447864899436e-06, "loss": 0.0016, "step": 8263 }, { "epoch": 1.94, "learning_rate": 5.865140170784423e-06, "loss": 0.0192, "step": 8264 }, { "epoch": 1.94, "learning_rate": 5.862832742250654e-06, "loss": 0.0082, "step": 8265 }, { "epoch": 1.94, "learning_rate": 5.860525579446338e-06, "loss": 0.0481, "step": 8266 }, { "epoch": 1.94, "learning_rate": 5.858218682519656e-06, "loss": 0.0031, "step": 8267 }, { "epoch": 1.94, "learning_rate": 5.855912051618786e-06, "loss": 0.0024, "step": 8268 }, { "epoch": 1.94, "learning_rate": 5.853605686891876e-06, "loss": 0.0511, "step": 8269 }, { "epoch": 1.94, "learning_rate": 5.851299588487072e-06, "loss": 0.0035, "step": 8270 }, { "epoch": 1.94, "learning_rate": 5.848993756552486e-06, "loss": 0.0017, "step": 8271 }, { "epoch": 1.94, "learning_rate": 5.846688191236225e-06, "loss": 0.0365, "step": 8272 }, { "epoch": 1.94, "learning_rate": 5.844382892686373e-06, "loss": 0.0417, "step": 8273 }, { "epoch": 1.94, "learning_rate": 5.8420778610509986e-06, "loss": 0.0411, "step": 8274 }, { "epoch": 1.94, "learning_rate": 5.83977309647816e-06, "loss": 0.0326, "step": 8275 }, { "epoch": 1.94, "learning_rate": 5.837468599115883e-06, "loss": 0.0041, "step": 8276 }, { "epoch": 1.94, "learning_rate": 5.8351643691121915e-06, "loss": 0.0277, "step": 8277 }, { "epoch": 1.94, "learning_rate": 5.832860406615079e-06, "loss": 0.0042, "step": 8278 }, { "epoch": 1.94, "learning_rate": 5.830556711772537e-06, "loss": 0.0445, "step": 8279 }, { "epoch": 1.94, "learning_rate": 5.828253284732526e-06, "loss": 0.0045, "step": 8280 }, { "epoch": 1.94, "learning_rate": 5.825950125642997e-06, "loss": 0.016, "step": 8281 }, { "epoch": 1.94, "learning_rate": 5.823647234651875e-06, "loss": 0.0267, "step": 8282 }, { "epoch": 1.94, "learning_rate": 5.821344611907085e-06, "loss": 0.0076, "step": 8283 }, { "epoch": 1.94, "learning_rate": 5.8190422575565176e-06, "loss": 0.0101, "step": 8284 }, { "epoch": 1.94, "learning_rate": 5.81674017174805e-06, "loss": 0.0313, "step": 8285 }, { "epoch": 1.94, "learning_rate": 5.814438354629553e-06, "loss": 0.0317, "step": 8286 }, { "epoch": 1.94, "learning_rate": 5.812136806348865e-06, "loss": 0.0039, "step": 8287 }, { "epoch": 1.95, "learning_rate": 5.809835527053817e-06, "loss": 0.0506, "step": 8288 }, { "epoch": 1.95, "learning_rate": 5.807534516892214e-06, "loss": 0.0027, "step": 8289 }, { "epoch": 1.95, "learning_rate": 5.805233776011858e-06, "loss": 0.0215, "step": 8290 }, { "epoch": 1.95, "learning_rate": 5.802933304560519e-06, "loss": 0.0207, "step": 8291 }, { "epoch": 1.95, "learning_rate": 5.800633102685961e-06, "loss": 0.0078, "step": 8292 }, { "epoch": 1.95, "learning_rate": 5.7983331705359235e-06, "loss": 0.0609, "step": 8293 }, { "epoch": 1.95, "learning_rate": 5.796033508258126e-06, "loss": 0.0142, "step": 8294 }, { "epoch": 1.95, "learning_rate": 5.793734116000281e-06, "loss": 0.0445, "step": 8295 }, { "epoch": 1.95, "learning_rate": 5.791434993910078e-06, "loss": 0.0018, "step": 8296 }, { "epoch": 1.95, "learning_rate": 5.7891361421351875e-06, "loss": 0.0192, "step": 8297 }, { "epoch": 1.95, "learning_rate": 5.786837560823259e-06, "loss": 0.0022, "step": 8298 }, { "epoch": 1.95, "learning_rate": 5.784539250121941e-06, "loss": 0.0425, "step": 8299 }, { "epoch": 1.95, "learning_rate": 5.782241210178847e-06, "loss": 0.0039, "step": 8300 }, { "epoch": 1.95, "learning_rate": 5.7799434411415755e-06, "loss": 0.0144, "step": 8301 }, { "epoch": 1.95, "learning_rate": 5.777645943157722e-06, "loss": 0.0227, "step": 8302 }, { "epoch": 1.95, "learning_rate": 5.775348716374848e-06, "loss": 0.0288, "step": 8303 }, { "epoch": 1.95, "learning_rate": 5.773051760940505e-06, "loss": 0.0011, "step": 8304 }, { "epoch": 1.95, "learning_rate": 5.770755077002221e-06, "loss": 0.0211, "step": 8305 }, { "epoch": 1.95, "learning_rate": 5.768458664707522e-06, "loss": 0.0208, "step": 8306 }, { "epoch": 1.95, "learning_rate": 5.766162524203901e-06, "loss": 0.0275, "step": 8307 }, { "epoch": 1.95, "learning_rate": 5.763866655638834e-06, "loss": 0.0034, "step": 8308 }, { "epoch": 1.95, "learning_rate": 5.761571059159792e-06, "loss": 0.0296, "step": 8309 }, { "epoch": 1.95, "learning_rate": 5.7592757349142115e-06, "loss": 0.0298, "step": 8310 }, { "epoch": 1.95, "learning_rate": 5.756980683049532e-06, "loss": 0.0271, "step": 8311 }, { "epoch": 1.95, "learning_rate": 5.754685903713159e-06, "loss": 0.0134, "step": 8312 }, { "epoch": 1.95, "learning_rate": 5.752391397052485e-06, "loss": 0.0119, "step": 8313 }, { "epoch": 1.95, "learning_rate": 5.750097163214881e-06, "loss": 0.0033, "step": 8314 }, { "epoch": 1.95, "learning_rate": 5.7478032023477145e-06, "loss": 0.0083, "step": 8315 }, { "epoch": 1.95, "learning_rate": 5.7455095145983245e-06, "loss": 0.0033, "step": 8316 }, { "epoch": 1.95, "learning_rate": 5.743216100114024e-06, "loss": 0.0005, "step": 8317 }, { "epoch": 1.95, "learning_rate": 5.740922959042133e-06, "loss": 0.0076, "step": 8318 }, { "epoch": 1.95, "learning_rate": 5.7386300915299305e-06, "loss": 0.0185, "step": 8319 }, { "epoch": 1.95, "learning_rate": 5.736337497724689e-06, "loss": 0.0393, "step": 8320 }, { "epoch": 1.95, "learning_rate": 5.734045177773659e-06, "loss": 0.0032, "step": 8321 }, { "epoch": 1.95, "learning_rate": 5.73175313182408e-06, "loss": 0.0015, "step": 8322 }, { "epoch": 1.95, "learning_rate": 5.729461360023169e-06, "loss": 0.0008, "step": 8323 }, { "epoch": 1.95, "learning_rate": 5.727169862518127e-06, "loss": 0.0002, "step": 8324 }, { "epoch": 1.95, "learning_rate": 5.724878639456128e-06, "loss": 0.0241, "step": 8325 }, { "epoch": 1.95, "learning_rate": 5.722587690984342e-06, "loss": 0.0006, "step": 8326 }, { "epoch": 1.95, "learning_rate": 5.720297017249924e-06, "loss": 0.0721, "step": 8327 }, { "epoch": 1.95, "learning_rate": 5.718006618399996e-06, "loss": 0.0013, "step": 8328 }, { "epoch": 1.95, "learning_rate": 5.71571649458167e-06, "loss": 0.0797, "step": 8329 }, { "epoch": 1.95, "learning_rate": 5.713426645942037e-06, "loss": 0.0334, "step": 8330 }, { "epoch": 1.96, "learning_rate": 5.7111370726281826e-06, "loss": 0.0133, "step": 8331 }, { "epoch": 1.96, "learning_rate": 5.708847774787158e-06, "loss": 0.004, "step": 8332 }, { "epoch": 1.96, "learning_rate": 5.7065587525660094e-06, "loss": 0.0438, "step": 8333 }, { "epoch": 1.96, "learning_rate": 5.704270006111753e-06, "loss": 0.0221, "step": 8334 }, { "epoch": 1.96, "learning_rate": 5.701981535571404e-06, "loss": 0.0012, "step": 8335 }, { "epoch": 1.96, "learning_rate": 5.699693341091943e-06, "loss": 0.0542, "step": 8336 }, { "epoch": 1.96, "learning_rate": 5.6974054228203415e-06, "loss": 0.0004, "step": 8337 }, { "epoch": 1.96, "learning_rate": 5.695117780903556e-06, "loss": 0.0051, "step": 8338 }, { "epoch": 1.96, "learning_rate": 5.692830415488519e-06, "loss": 0.0252, "step": 8339 }, { "epoch": 1.96, "learning_rate": 5.690543326722149e-06, "loss": 0.0004, "step": 8340 }, { "epoch": 1.96, "learning_rate": 5.688256514751338e-06, "loss": 0.0065, "step": 8341 }, { "epoch": 1.96, "learning_rate": 5.685969979722974e-06, "loss": 0.0008, "step": 8342 }, { "epoch": 1.96, "learning_rate": 5.683683721783927e-06, "loss": 0.0279, "step": 8343 }, { "epoch": 1.96, "learning_rate": 5.681397741081027e-06, "loss": 0.12, "step": 8344 }, { "epoch": 1.96, "learning_rate": 5.679112037761117e-06, "loss": 0.0501, "step": 8345 }, { "epoch": 1.96, "learning_rate": 5.676826611970996e-06, "loss": 0.0155, "step": 8346 }, { "epoch": 1.96, "learning_rate": 5.674541463857465e-06, "loss": 0.0287, "step": 8347 }, { "epoch": 1.96, "learning_rate": 5.6722565935672955e-06, "loss": 0.0354, "step": 8348 }, { "epoch": 1.96, "learning_rate": 5.669972001247243e-06, "loss": 0.0111, "step": 8349 }, { "epoch": 1.96, "learning_rate": 5.667687687044045e-06, "loss": 0.0184, "step": 8350 }, { "epoch": 1.96, "learning_rate": 5.665403651104427e-06, "loss": 0.0128, "step": 8351 }, { "epoch": 1.96, "learning_rate": 5.663119893575092e-06, "loss": 0.0018, "step": 8352 }, { "epoch": 1.96, "learning_rate": 5.660836414602718e-06, "loss": 0.01, "step": 8353 }, { "epoch": 1.96, "learning_rate": 5.658553214333983e-06, "loss": 0.0214, "step": 8354 }, { "epoch": 1.96, "learning_rate": 5.656270292915531e-06, "loss": 0.0107, "step": 8355 }, { "epoch": 1.96, "learning_rate": 5.6539876504939925e-06, "loss": 0.0058, "step": 8356 }, { "epoch": 1.96, "learning_rate": 5.6517052872159805e-06, "loss": 0.0343, "step": 8357 }, { "epoch": 1.96, "learning_rate": 5.649423203228095e-06, "loss": 0.0018, "step": 8358 }, { "epoch": 1.96, "learning_rate": 5.647141398676913e-06, "loss": 0.0049, "step": 8359 }, { "epoch": 1.96, "learning_rate": 5.644859873708989e-06, "loss": 0.0054, "step": 8360 }, { "epoch": 1.96, "learning_rate": 5.6425786284708715e-06, "loss": 0.0315, "step": 8361 }, { "epoch": 1.96, "learning_rate": 5.64029766310908e-06, "loss": 0.0061, "step": 8362 }, { "epoch": 1.96, "learning_rate": 5.638016977770124e-06, "loss": 0.0072, "step": 8363 }, { "epoch": 1.96, "learning_rate": 5.635736572600492e-06, "loss": 0.0422, "step": 8364 }, { "epoch": 1.96, "learning_rate": 5.63345644774665e-06, "loss": 0.054, "step": 8365 }, { "epoch": 1.96, "learning_rate": 5.631176603355049e-06, "loss": 0.0167, "step": 8366 }, { "epoch": 1.96, "learning_rate": 5.628897039572129e-06, "loss": 0.0185, "step": 8367 }, { "epoch": 1.96, "learning_rate": 5.626617756544303e-06, "loss": 0.0246, "step": 8368 }, { "epoch": 1.96, "learning_rate": 5.624338754417964e-06, "loss": 0.0023, "step": 8369 }, { "epoch": 1.96, "learning_rate": 5.6220600333395e-06, "loss": 0.0157, "step": 8370 }, { "epoch": 1.96, "learning_rate": 5.6197815934552705e-06, "loss": 0.0018, "step": 8371 }, { "epoch": 1.96, "learning_rate": 5.617503434911618e-06, "loss": 0.0053, "step": 8372 }, { "epoch": 1.97, "learning_rate": 5.615225557854862e-06, "loss": 0.0177, "step": 8373 }, { "epoch": 1.97, "learning_rate": 5.612947962431323e-06, "loss": 0.0357, "step": 8374 }, { "epoch": 1.97, "learning_rate": 5.610670648787282e-06, "loss": 0.0025, "step": 8375 }, { "epoch": 1.97, "learning_rate": 5.608393617069012e-06, "loss": 0.0057, "step": 8376 }, { "epoch": 1.97, "learning_rate": 5.606116867422764e-06, "loss": 0.0106, "step": 8377 }, { "epoch": 1.97, "learning_rate": 5.603840399994775e-06, "loss": 0.023, "step": 8378 }, { "epoch": 1.97, "learning_rate": 5.601564214931271e-06, "loss": 0.0539, "step": 8379 }, { "epoch": 1.97, "learning_rate": 5.599288312378435e-06, "loss": 0.0084, "step": 8380 }, { "epoch": 1.97, "learning_rate": 5.597012692482457e-06, "loss": 0.0298, "step": 8381 }, { "epoch": 1.97, "learning_rate": 5.594737355389497e-06, "loss": 0.0159, "step": 8382 }, { "epoch": 1.97, "learning_rate": 5.592462301245703e-06, "loss": 0.0038, "step": 8383 }, { "epoch": 1.97, "learning_rate": 5.5901875301972e-06, "loss": 0.005, "step": 8384 }, { "epoch": 1.97, "learning_rate": 5.58791304239009e-06, "loss": 0.0036, "step": 8385 }, { "epoch": 1.97, "learning_rate": 5.585638837970471e-06, "loss": 0.0136, "step": 8386 }, { "epoch": 1.97, "learning_rate": 5.583364917084413e-06, "loss": 0.0045, "step": 8387 }, { "epoch": 1.97, "learning_rate": 5.581091279877966e-06, "loss": 0.0063, "step": 8388 }, { "epoch": 1.97, "learning_rate": 5.578817926497165e-06, "loss": 0.0111, "step": 8389 }, { "epoch": 1.97, "learning_rate": 5.576544857088033e-06, "loss": 0.0005, "step": 8390 }, { "epoch": 1.97, "learning_rate": 5.574272071796565e-06, "loss": 0.0019, "step": 8391 }, { "epoch": 1.97, "learning_rate": 5.571999570768743e-06, "loss": 0.0189, "step": 8392 }, { "epoch": 1.97, "learning_rate": 5.569727354150523e-06, "loss": 0.0266, "step": 8393 }, { "epoch": 1.97, "learning_rate": 5.567455422087853e-06, "loss": 0.0137, "step": 8394 }, { "epoch": 1.97, "learning_rate": 5.565183774726668e-06, "loss": 0.0348, "step": 8395 }, { "epoch": 1.97, "learning_rate": 5.5629124122128605e-06, "loss": 0.0024, "step": 8396 }, { "epoch": 1.97, "learning_rate": 5.560641334692329e-06, "loss": 0.0404, "step": 8397 }, { "epoch": 1.97, "learning_rate": 5.55837054231094e-06, "loss": 0.0088, "step": 8398 }, { "epoch": 1.97, "learning_rate": 5.55610003521455e-06, "loss": 0.0209, "step": 8399 }, { "epoch": 1.97, "learning_rate": 5.553829813548992e-06, "loss": 0.0249, "step": 8400 }, { "epoch": 1.97, "learning_rate": 5.55155987746008e-06, "loss": 0.0007, "step": 8401 }, { "epoch": 1.97, "learning_rate": 5.549290227093609e-06, "loss": 0.0364, "step": 8402 }, { "epoch": 1.97, "learning_rate": 5.547020862595367e-06, "loss": 0.0008, "step": 8403 }, { "epoch": 1.97, "learning_rate": 5.544751784111107e-06, "loss": 0.0024, "step": 8404 }, { "epoch": 1.97, "learning_rate": 5.542482991786573e-06, "loss": 0.0472, "step": 8405 }, { "epoch": 1.97, "learning_rate": 5.540214485767492e-06, "loss": 0.0377, "step": 8406 }, { "epoch": 1.97, "learning_rate": 5.5379462661995684e-06, "loss": 0.0019, "step": 8407 }, { "epoch": 1.97, "learning_rate": 5.535678333228489e-06, "loss": 0.0139, "step": 8408 }, { "epoch": 1.97, "learning_rate": 5.533410686999918e-06, "loss": 0.0004, "step": 8409 }, { "epoch": 1.97, "learning_rate": 5.531143327659516e-06, "loss": 0.0118, "step": 8410 }, { "epoch": 1.97, "learning_rate": 5.528876255352908e-06, "loss": 0.0587, "step": 8411 }, { "epoch": 1.97, "learning_rate": 5.526609470225706e-06, "loss": 0.0321, "step": 8412 }, { "epoch": 1.97, "learning_rate": 5.524342972423513e-06, "loss": 0.0509, "step": 8413 }, { "epoch": 1.97, "learning_rate": 5.522076762091896e-06, "loss": 0.0015, "step": 8414 }, { "epoch": 1.97, "learning_rate": 5.519810839376426e-06, "loss": 0.0095, "step": 8415 }, { "epoch": 1.98, "learning_rate": 5.517545204422627e-06, "loss": 0.0069, "step": 8416 }, { "epoch": 1.98, "learning_rate": 5.515279857376035e-06, "loss": 0.0041, "step": 8417 }, { "epoch": 1.98, "learning_rate": 5.513014798382139e-06, "loss": 0.003, "step": 8418 }, { "epoch": 1.98, "learning_rate": 5.5107500275864356e-06, "loss": 0.0038, "step": 8419 }, { "epoch": 1.98, "learning_rate": 5.5084855451343854e-06, "loss": 0.0034, "step": 8420 }, { "epoch": 1.98, "learning_rate": 5.506221351171433e-06, "loss": 0.0124, "step": 8421 }, { "epoch": 1.98, "learning_rate": 5.503957445843011e-06, "loss": 0.0768, "step": 8422 }, { "epoch": 1.98, "learning_rate": 5.501693829294531e-06, "loss": 0.0001, "step": 8423 }, { "epoch": 1.98, "learning_rate": 5.499430501671381e-06, "loss": 0.0014, "step": 8424 }, { "epoch": 1.98, "learning_rate": 5.497167463118932e-06, "loss": 0.021, "step": 8425 }, { "epoch": 1.98, "learning_rate": 5.494904713782545e-06, "loss": 0.0526, "step": 8426 }, { "epoch": 1.98, "learning_rate": 5.492642253807553e-06, "loss": 0.0007, "step": 8427 }, { "epoch": 1.98, "learning_rate": 5.490380083339269e-06, "loss": 0.0117, "step": 8428 }, { "epoch": 1.98, "learning_rate": 5.488118202522999e-06, "loss": 0.0061, "step": 8429 }, { "epoch": 1.98, "learning_rate": 5.4858566115040165e-06, "loss": 0.0762, "step": 8430 }, { "epoch": 1.98, "learning_rate": 5.483595310427596e-06, "loss": 0.0048, "step": 8431 }, { "epoch": 1.98, "learning_rate": 5.481334299438961e-06, "loss": 0.0014, "step": 8432 }, { "epoch": 1.98, "learning_rate": 5.479073578683352e-06, "loss": 0.0018, "step": 8433 }, { "epoch": 1.98, "learning_rate": 5.4768131483059615e-06, "loss": 0.0181, "step": 8434 }, { "epoch": 1.98, "learning_rate": 5.474553008451993e-06, "loss": 0.0053, "step": 8435 }, { "epoch": 1.98, "learning_rate": 5.472293159266597e-06, "loss": 0.0457, "step": 8436 }, { "epoch": 1.98, "learning_rate": 5.470033600894932e-06, "loss": 0.0018, "step": 8437 }, { "epoch": 1.98, "learning_rate": 5.467774333482132e-06, "loss": 0.0134, "step": 8438 }, { "epoch": 1.98, "learning_rate": 5.465515357173306e-06, "loss": 0.0779, "step": 8439 }, { "epoch": 1.98, "learning_rate": 5.463256672113546e-06, "loss": 0.022, "step": 8440 }, { "epoch": 1.98, "learning_rate": 5.460998278447927e-06, "loss": 0.0118, "step": 8441 }, { "epoch": 1.98, "learning_rate": 5.458740176321508e-06, "loss": 0.029, "step": 8442 }, { "epoch": 1.98, "learning_rate": 5.456482365879326e-06, "loss": 0.0103, "step": 8443 }, { "epoch": 1.98, "learning_rate": 5.454224847266398e-06, "loss": 0.0129, "step": 8444 }, { "epoch": 1.98, "learning_rate": 5.45196762062772e-06, "loss": 0.0066, "step": 8445 }, { "epoch": 1.98, "learning_rate": 5.449710686108279e-06, "loss": 0.0553, "step": 8446 }, { "epoch": 1.98, "learning_rate": 5.4474540438530445e-06, "loss": 0.0174, "step": 8447 }, { "epoch": 1.98, "learning_rate": 5.4451976940069426e-06, "loss": 0.0202, "step": 8448 }, { "epoch": 1.98, "learning_rate": 5.4429416367149134e-06, "loss": 0.0331, "step": 8449 }, { "epoch": 1.98, "learning_rate": 5.440685872121852e-06, "loss": 0.0066, "step": 8450 }, { "epoch": 1.98, "learning_rate": 5.438430400372659e-06, "loss": 0.0323, "step": 8451 }, { "epoch": 1.98, "learning_rate": 5.4361752216121875e-06, "loss": 0.0013, "step": 8452 }, { "epoch": 1.98, "learning_rate": 5.433920335985299e-06, "loss": 0.0723, "step": 8453 }, { "epoch": 1.98, "learning_rate": 5.4316657436368146e-06, "loss": 0.0266, "step": 8454 }, { "epoch": 1.98, "learning_rate": 5.4294114447115564e-06, "loss": 0.0283, "step": 8455 }, { "epoch": 1.98, "learning_rate": 5.427157439354313e-06, "loss": 0.0014, "step": 8456 }, { "epoch": 1.98, "learning_rate": 5.4249037277098534e-06, "loss": 0.022, "step": 8457 }, { "epoch": 1.99, "learning_rate": 5.422650309922943e-06, "loss": 0.0107, "step": 8458 }, { "epoch": 1.99, "learning_rate": 5.420397186138313e-06, "loss": 0.0019, "step": 8459 }, { "epoch": 1.99, "learning_rate": 5.418144356500683e-06, "loss": 0.0398, "step": 8460 }, { "epoch": 1.99, "learning_rate": 5.415891821154746e-06, "loss": 0.0051, "step": 8461 }, { "epoch": 1.99, "learning_rate": 5.41363958024519e-06, "loss": 0.0001, "step": 8462 }, { "epoch": 1.99, "learning_rate": 5.4113876339166725e-06, "loss": 0.002, "step": 8463 }, { "epoch": 1.99, "learning_rate": 5.4091359823138315e-06, "loss": 0.0411, "step": 8464 }, { "epoch": 1.99, "learning_rate": 5.406884625581299e-06, "loss": 0.0143, "step": 8465 }, { "epoch": 1.99, "learning_rate": 5.404633563863669e-06, "loss": 0.0169, "step": 8466 }, { "epoch": 1.99, "learning_rate": 5.402382797305541e-06, "loss": 0.0065, "step": 8467 }, { "epoch": 1.99, "learning_rate": 5.400132326051464e-06, "loss": 0.024, "step": 8468 }, { "epoch": 1.99, "learning_rate": 5.397882150245998e-06, "loss": 0.009, "step": 8469 }, { "epoch": 1.99, "learning_rate": 5.395632270033663e-06, "loss": 0.0012, "step": 8470 }, { "epoch": 1.99, "learning_rate": 5.393382685558976e-06, "loss": 0.0177, "step": 8471 }, { "epoch": 1.99, "learning_rate": 5.3911333969664235e-06, "loss": 0.0211, "step": 8472 }, { "epoch": 1.99, "learning_rate": 5.388884404400474e-06, "loss": 0.0006, "step": 8473 }, { "epoch": 1.99, "learning_rate": 5.386635708005588e-06, "loss": 0.0026, "step": 8474 }, { "epoch": 1.99, "learning_rate": 5.3843873079261935e-06, "loss": 0.0384, "step": 8475 }, { "epoch": 1.99, "learning_rate": 5.382139204306703e-06, "loss": 0.0009, "step": 8476 }, { "epoch": 1.99, "learning_rate": 5.379891397291512e-06, "loss": 0.0083, "step": 8477 }, { "epoch": 1.99, "learning_rate": 5.3776438870250015e-06, "loss": 0.0047, "step": 8478 }, { "epoch": 1.99, "learning_rate": 5.375396673651527e-06, "loss": 0.0532, "step": 8479 }, { "epoch": 1.99, "learning_rate": 5.373149757315421e-06, "loss": 0.0163, "step": 8480 }, { "epoch": 1.99, "learning_rate": 5.37090313816101e-06, "loss": 0.0488, "step": 8481 }, { "epoch": 1.99, "learning_rate": 5.368656816332588e-06, "loss": 0.0225, "step": 8482 }, { "epoch": 1.99, "learning_rate": 5.366410791974446e-06, "loss": 0.0147, "step": 8483 }, { "epoch": 1.99, "learning_rate": 5.364165065230831e-06, "loss": 0.0022, "step": 8484 }, { "epoch": 1.99, "learning_rate": 5.361919636245996e-06, "loss": 0.0006, "step": 8485 }, { "epoch": 1.99, "learning_rate": 5.359674505164157e-06, "loss": 0.0048, "step": 8486 }, { "epoch": 1.99, "learning_rate": 5.3574296721295315e-06, "loss": 0.0007, "step": 8487 }, { "epoch": 1.99, "learning_rate": 5.3551851372862876e-06, "loss": 0.0596, "step": 8488 }, { "epoch": 1.99, "learning_rate": 5.3529409007786e-06, "loss": 0.0289, "step": 8489 }, { "epoch": 1.99, "learning_rate": 5.350696962750619e-06, "loss": 0.043, "step": 8490 }, { "epoch": 1.99, "learning_rate": 5.3484533233464686e-06, "loss": 0.0177, "step": 8491 }, { "epoch": 1.99, "learning_rate": 5.346209982710257e-06, "loss": 0.0184, "step": 8492 }, { "epoch": 1.99, "learning_rate": 5.343966940986069e-06, "loss": 0.0209, "step": 8493 }, { "epoch": 1.99, "learning_rate": 5.341724198317985e-06, "loss": 0.0022, "step": 8494 }, { "epoch": 1.99, "learning_rate": 5.33948175485005e-06, "loss": 0.0031, "step": 8495 }, { "epoch": 1.99, "learning_rate": 5.337239610726296e-06, "loss": 0.007, "step": 8496 }, { "epoch": 1.99, "learning_rate": 5.334997766090731e-06, "loss": 0.001, "step": 8497 }, { "epoch": 1.99, "learning_rate": 5.332756221087354e-06, "loss": 0.0002, "step": 8498 }, { "epoch": 1.99, "learning_rate": 5.330514975860145e-06, "loss": 0.0143, "step": 8499 }, { "epoch": 1.99, "learning_rate": 5.328274030553044e-06, "loss": 0.0005, "step": 8500 }, { "epoch": 2.0, "learning_rate": 5.326033385309998e-06, "loss": 0.0115, "step": 8501 }, { "epoch": 2.0, "learning_rate": 5.323793040274916e-06, "loss": 0.0005, "step": 8502 }, { "epoch": 2.0, "learning_rate": 5.321552995591705e-06, "loss": 0.0033, "step": 8503 }, { "epoch": 2.0, "learning_rate": 5.319313251404228e-06, "loss": 0.0033, "step": 8504 }, { "epoch": 2.0, "learning_rate": 5.317073807856352e-06, "loss": 0.006, "step": 8505 }, { "epoch": 2.0, "learning_rate": 5.314834665091918e-06, "loss": 0.0548, "step": 8506 }, { "epoch": 2.0, "learning_rate": 5.312595823254744e-06, "loss": 0.0539, "step": 8507 }, { "epoch": 2.0, "learning_rate": 5.310357282488629e-06, "loss": 0.0136, "step": 8508 }, { "epoch": 2.0, "learning_rate": 5.30811904293735e-06, "loss": 0.0174, "step": 8509 }, { "epoch": 2.0, "learning_rate": 5.305881104744677e-06, "loss": 0.0579, "step": 8510 }, { "epoch": 2.0, "learning_rate": 5.303643468054349e-06, "loss": 0.0155, "step": 8511 }, { "epoch": 2.0, "learning_rate": 5.301406133010087e-06, "loss": 0.0205, "step": 8512 }, { "epoch": 2.0, "learning_rate": 5.299169099755595e-06, "loss": 0.0791, "step": 8513 }, { "epoch": 2.0, "learning_rate": 5.296932368434559e-06, "loss": 0.0042, "step": 8514 }, { "epoch": 2.0, "learning_rate": 5.2946959391906435e-06, "loss": 0.0155, "step": 8515 }, { "epoch": 2.0, "learning_rate": 5.29245981216749e-06, "loss": 0.0382, "step": 8516 }, { "epoch": 2.0, "learning_rate": 5.290223987508731e-06, "loss": 0.0589, "step": 8517 }, { "epoch": 2.0, "learning_rate": 5.287988465357967e-06, "loss": 0.0177, "step": 8518 }, { "epoch": 2.0, "learning_rate": 5.285753245858796e-06, "loss": 0.0606, "step": 8519 }, { "epoch": 2.0, "learning_rate": 5.28351832915477e-06, "loss": 0.0158, "step": 8520 }, { "epoch": 2.0, "learning_rate": 5.281283715389448e-06, "loss": 0.0227, "step": 8521 }, { "epoch": 2.0, "learning_rate": 5.279049404706353e-06, "loss": 0.0292, "step": 8522 }, { "epoch": 2.0, "learning_rate": 5.276815397249e-06, "loss": 0.0239, "step": 8523 }, { "epoch": 2.0, "learning_rate": 5.2745816931608785e-06, "loss": 0.0076, "step": 8524 }, { "epoch": 2.0, "learning_rate": 5.2723482925854494e-06, "loss": 0.1003, "step": 8525 }, { "epoch": 2.0, "learning_rate": 5.270115195666177e-06, "loss": 0.0094, "step": 8526 }, { "epoch": 2.0, "learning_rate": 5.267882402546485e-06, "loss": 0.0664, "step": 8527 }, { "epoch": 2.0, "learning_rate": 5.265649913369788e-06, "loss": 0.0032, "step": 8528 }, { "epoch": 2.0, "learning_rate": 5.263417728279472e-06, "loss": 0.0325, "step": 8529 }, { "epoch": 2.0, "learning_rate": 5.2611858474189185e-06, "loss": 0.0045, "step": 8530 }, { "epoch": 2.0, "learning_rate": 5.258954270931478e-06, "loss": 0.0038, "step": 8531 }, { "epoch": 2.0, "learning_rate": 5.2567229989604775e-06, "loss": 0.0033, "step": 8532 }, { "epoch": 2.0, "learning_rate": 5.254492031649243e-06, "loss": 0.0004, "step": 8533 }, { "epoch": 2.0, "learning_rate": 5.2522613691410586e-06, "loss": 0.0839, "step": 8534 }, { "epoch": 2.0, "learning_rate": 5.250031011579211e-06, "loss": 0.0202, "step": 8535 }, { "epoch": 2.0, "learning_rate": 5.24780095910694e-06, "loss": 0.0546, "step": 8536 }, { "epoch": 2.0, "learning_rate": 5.245571211867495e-06, "loss": 0.1132, "step": 8537 }, { "epoch": 2.0, "learning_rate": 5.243341770004083e-06, "loss": 0.0007, "step": 8538 }, { "epoch": 2.0, "learning_rate": 5.241112633659911e-06, "loss": 0.0125, "step": 8539 }, { "epoch": 2.0, "learning_rate": 5.238883802978143e-06, "loss": 0.0004, "step": 8540 }, { "epoch": 2.0, "learning_rate": 5.236655278101943e-06, "loss": 0.0092, "step": 8541 }, { "epoch": 2.0, "learning_rate": 5.234427059174453e-06, "loss": 0.0655, "step": 8542 }, { "epoch": 2.0, "learning_rate": 5.232199146338786e-06, "loss": 0.0026, "step": 8543 }, { "epoch": 2.01, "learning_rate": 5.229971539738041e-06, "loss": 0.0103, "step": 8544 }, { "epoch": 2.01, "learning_rate": 5.227744239515296e-06, "loss": 0.0349, "step": 8545 }, { "epoch": 2.01, "learning_rate": 5.2255172458136115e-06, "loss": 0.0012, "step": 8546 }, { "epoch": 2.01, "learning_rate": 5.223290558776027e-06, "loss": 0.0032, "step": 8547 }, { "epoch": 2.01, "learning_rate": 5.221064178545559e-06, "loss": 0.1109, "step": 8548 }, { "epoch": 2.01, "learning_rate": 5.218838105265214e-06, "loss": 0.0255, "step": 8549 }, { "epoch": 2.01, "learning_rate": 5.216612339077967e-06, "loss": 0.0061, "step": 8550 }, { "epoch": 2.01, "learning_rate": 5.214386880126781e-06, "loss": 0.0068, "step": 8551 }, { "epoch": 2.01, "learning_rate": 5.21216172855459e-06, "loss": 0.0527, "step": 8552 }, { "epoch": 2.01, "learning_rate": 5.209936884504325e-06, "loss": 0.0092, "step": 8553 }, { "epoch": 2.01, "learning_rate": 5.207712348118881e-06, "loss": 0.0907, "step": 8554 }, { "epoch": 2.01, "learning_rate": 5.205488119541147e-06, "loss": 0.0425, "step": 8555 }, { "epoch": 2.01, "learning_rate": 5.203264198913973e-06, "loss": 0.0248, "step": 8556 }, { "epoch": 2.01, "learning_rate": 5.201040586380206e-06, "loss": 0.0497, "step": 8557 }, { "epoch": 2.01, "learning_rate": 5.198817282082675e-06, "loss": 0.0008, "step": 8558 }, { "epoch": 2.01, "learning_rate": 5.196594286164176e-06, "loss": 0.021, "step": 8559 }, { "epoch": 2.01, "learning_rate": 5.194371598767492e-06, "loss": 0.0021, "step": 8560 }, { "epoch": 2.01, "learning_rate": 5.192149220035383e-06, "loss": 0.0007, "step": 8561 }, { "epoch": 2.01, "learning_rate": 5.1899271501105984e-06, "loss": 0.002, "step": 8562 }, { "epoch": 2.01, "learning_rate": 5.18770538913586e-06, "loss": 0.0126, "step": 8563 }, { "epoch": 2.01, "learning_rate": 5.185483937253868e-06, "loss": 0.009, "step": 8564 }, { "epoch": 2.01, "learning_rate": 5.183262794607302e-06, "loss": 0.0203, "step": 8565 }, { "epoch": 2.01, "learning_rate": 5.181041961338835e-06, "loss": 0.0324, "step": 8566 }, { "epoch": 2.01, "learning_rate": 5.178821437591109e-06, "loss": 0.0037, "step": 8567 }, { "epoch": 2.01, "learning_rate": 5.176601223506738e-06, "loss": 0.0178, "step": 8568 }, { "epoch": 2.01, "learning_rate": 5.174381319228338e-06, "loss": 0.04, "step": 8569 }, { "epoch": 2.01, "learning_rate": 5.172161724898483e-06, "loss": 0.0321, "step": 8570 }, { "epoch": 2.01, "learning_rate": 5.169942440659752e-06, "loss": 0.0023, "step": 8571 }, { "epoch": 2.01, "learning_rate": 5.167723466654671e-06, "loss": 0.0201, "step": 8572 }, { "epoch": 2.01, "learning_rate": 5.165504803025775e-06, "loss": 0.0365, "step": 8573 }, { "epoch": 2.01, "learning_rate": 5.1632864499155635e-06, "loss": 0.0321, "step": 8574 }, { "epoch": 2.01, "learning_rate": 5.161068407466526e-06, "loss": 0.0142, "step": 8575 }, { "epoch": 2.01, "learning_rate": 5.158850675821125e-06, "loss": 0.0395, "step": 8576 }, { "epoch": 2.01, "learning_rate": 5.1566332551218016e-06, "loss": 0.0013, "step": 8577 }, { "epoch": 2.01, "learning_rate": 5.154416145510985e-06, "loss": 0.002, "step": 8578 }, { "epoch": 2.01, "learning_rate": 5.152199347131077e-06, "loss": 0.0218, "step": 8579 }, { "epoch": 2.01, "learning_rate": 5.149982860124465e-06, "loss": 0.0033, "step": 8580 }, { "epoch": 2.01, "learning_rate": 5.147766684633506e-06, "loss": 0.0019, "step": 8581 }, { "epoch": 2.01, "learning_rate": 5.145550820800555e-06, "loss": 0.0026, "step": 8582 }, { "epoch": 2.01, "learning_rate": 5.14333526876793e-06, "loss": 0.0071, "step": 8583 }, { "epoch": 2.01, "learning_rate": 5.141120028677933e-06, "loss": 0.0073, "step": 8584 }, { "epoch": 2.01, "learning_rate": 5.138905100672857e-06, "loss": 0.0307, "step": 8585 }, { "epoch": 2.02, "learning_rate": 5.1366904848949615e-06, "loss": 0.0025, "step": 8586 }, { "epoch": 2.02, "learning_rate": 5.13447618148649e-06, "loss": 0.0681, "step": 8587 }, { "epoch": 2.02, "learning_rate": 5.132262190589666e-06, "loss": 0.0819, "step": 8588 }, { "epoch": 2.02, "learning_rate": 5.130048512346697e-06, "loss": 0.0476, "step": 8589 }, { "epoch": 2.02, "learning_rate": 5.127835146899762e-06, "loss": 0.0427, "step": 8590 }, { "epoch": 2.02, "learning_rate": 5.125622094391034e-06, "loss": 0.0019, "step": 8591 }, { "epoch": 2.02, "learning_rate": 5.123409354962649e-06, "loss": 0.0473, "step": 8592 }, { "epoch": 2.02, "learning_rate": 5.121196928756732e-06, "loss": 0.0309, "step": 8593 }, { "epoch": 2.02, "learning_rate": 5.118984815915391e-06, "loss": 0.0065, "step": 8594 }, { "epoch": 2.02, "learning_rate": 5.116773016580707e-06, "loss": 0.0067, "step": 8595 }, { "epoch": 2.02, "learning_rate": 5.114561530894744e-06, "loss": 0.0095, "step": 8596 }, { "epoch": 2.02, "learning_rate": 5.112350358999538e-06, "loss": 0.0386, "step": 8597 }, { "epoch": 2.02, "learning_rate": 5.110139501037124e-06, "loss": 0.0337, "step": 8598 }, { "epoch": 2.02, "learning_rate": 5.107928957149499e-06, "loss": 0.0149, "step": 8599 }, { "epoch": 2.02, "learning_rate": 5.105718727478644e-06, "loss": 0.012, "step": 8600 }, { "epoch": 2.02, "learning_rate": 5.103508812166526e-06, "loss": 0.036, "step": 8601 }, { "epoch": 2.02, "learning_rate": 5.101299211355086e-06, "loss": 0.0347, "step": 8602 }, { "epoch": 2.02, "learning_rate": 5.099089925186247e-06, "loss": 0.0573, "step": 8603 }, { "epoch": 2.02, "learning_rate": 5.096880953801904e-06, "loss": 0.0174, "step": 8604 }, { "epoch": 2.02, "learning_rate": 5.094672297343947e-06, "loss": 0.0058, "step": 8605 }, { "epoch": 2.02, "learning_rate": 5.092463955954234e-06, "loss": 0.0033, "step": 8606 }, { "epoch": 2.02, "learning_rate": 5.090255929774615e-06, "loss": 0.0776, "step": 8607 }, { "epoch": 2.02, "learning_rate": 5.0880482189468935e-06, "loss": 0.0224, "step": 8608 }, { "epoch": 2.02, "learning_rate": 5.085840823612882e-06, "loss": 0.0138, "step": 8609 }, { "epoch": 2.02, "learning_rate": 5.083633743914363e-06, "loss": 0.0007, "step": 8610 }, { "epoch": 2.02, "learning_rate": 5.081426979993092e-06, "loss": 0.0025, "step": 8611 }, { "epoch": 2.02, "learning_rate": 5.079220531990811e-06, "loss": 0.0008, "step": 8612 }, { "epoch": 2.02, "learning_rate": 5.077014400049236e-06, "loss": 0.0021, "step": 8613 }, { "epoch": 2.02, "learning_rate": 5.0748085843100716e-06, "loss": 0.0016, "step": 8614 }, { "epoch": 2.02, "learning_rate": 5.072603084914996e-06, "loss": 0.0547, "step": 8615 }, { "epoch": 2.02, "learning_rate": 5.070397902005665e-06, "loss": 0.0188, "step": 8616 }, { "epoch": 2.02, "learning_rate": 5.068193035723716e-06, "loss": 0.0618, "step": 8617 }, { "epoch": 2.02, "learning_rate": 5.065988486210773e-06, "loss": 0.0093, "step": 8618 }, { "epoch": 2.02, "learning_rate": 5.0637842536084315e-06, "loss": 0.0102, "step": 8619 }, { "epoch": 2.02, "learning_rate": 5.061580338058263e-06, "loss": 0.0244, "step": 8620 }, { "epoch": 2.02, "learning_rate": 5.059376739701834e-06, "loss": 0.0445, "step": 8621 }, { "epoch": 2.02, "learning_rate": 5.057173458680678e-06, "loss": 0.002, "step": 8622 }, { "epoch": 2.02, "learning_rate": 5.054970495136309e-06, "loss": 0.0093, "step": 8623 }, { "epoch": 2.02, "learning_rate": 5.052767849210221e-06, "loss": 0.0069, "step": 8624 }, { "epoch": 2.02, "learning_rate": 5.050565521043895e-06, "loss": 0.054, "step": 8625 }, { "epoch": 2.02, "learning_rate": 5.04836351077878e-06, "loss": 0.0001, "step": 8626 }, { "epoch": 2.02, "learning_rate": 5.046161818556319e-06, "loss": 0.0097, "step": 8627 }, { "epoch": 2.02, "learning_rate": 5.043960444517921e-06, "loss": 0.0022, "step": 8628 }, { "epoch": 2.03, "learning_rate": 5.041759388804978e-06, "loss": 0.0031, "step": 8629 }, { "epoch": 2.03, "learning_rate": 5.039558651558869e-06, "loss": 0.0369, "step": 8630 }, { "epoch": 2.03, "learning_rate": 5.037358232920944e-06, "loss": 0.0028, "step": 8631 }, { "epoch": 2.03, "learning_rate": 5.035158133032534e-06, "loss": 0.0225, "step": 8632 }, { "epoch": 2.03, "learning_rate": 5.032958352034949e-06, "loss": 0.0219, "step": 8633 }, { "epoch": 2.03, "learning_rate": 5.030758890069487e-06, "loss": 0.0085, "step": 8634 }, { "epoch": 2.03, "learning_rate": 5.028559747277417e-06, "loss": 0.0259, "step": 8635 }, { "epoch": 2.03, "learning_rate": 5.026360923799984e-06, "loss": 0.0058, "step": 8636 }, { "epoch": 2.03, "learning_rate": 5.024162419778425e-06, "loss": 0.0134, "step": 8637 }, { "epoch": 2.03, "learning_rate": 5.021964235353949e-06, "loss": 0.0019, "step": 8638 }, { "epoch": 2.03, "learning_rate": 5.01976637066774e-06, "loss": 0.0582, "step": 8639 }, { "epoch": 2.03, "learning_rate": 5.017568825860967e-06, "loss": 0.0302, "step": 8640 }, { "epoch": 2.03, "learning_rate": 5.015371601074783e-06, "loss": 0.0002, "step": 8641 }, { "epoch": 2.03, "learning_rate": 5.013174696450309e-06, "loss": 0.0005, "step": 8642 }, { "epoch": 2.03, "learning_rate": 5.010978112128659e-06, "loss": 0.0063, "step": 8643 }, { "epoch": 2.03, "learning_rate": 5.008781848250916e-06, "loss": 0.0134, "step": 8644 }, { "epoch": 2.03, "learning_rate": 5.00658590495814e-06, "loss": 0.0107, "step": 8645 }, { "epoch": 2.03, "learning_rate": 5.004390282391385e-06, "loss": 0.0022, "step": 8646 }, { "epoch": 2.03, "learning_rate": 5.002194980691671e-06, "loss": 0.0136, "step": 8647 }, { "epoch": 2.03, "learning_rate": 5.000000000000003e-06, "loss": 0.0239, "step": 8648 }, { "epoch": 2.03, "learning_rate": 4.997805340457358e-06, "loss": 0.0163, "step": 8649 }, { "epoch": 2.03, "learning_rate": 4.995611002204709e-06, "loss": 0.005, "step": 8650 }, { "epoch": 2.03, "learning_rate": 4.9934169853829915e-06, "loss": 0.0213, "step": 8651 }, { "epoch": 2.03, "learning_rate": 4.991223290133125e-06, "loss": 0.0097, "step": 8652 }, { "epoch": 2.03, "learning_rate": 4.989029916596017e-06, "loss": 0.0372, "step": 8653 }, { "epoch": 2.03, "learning_rate": 4.9868368649125435e-06, "loss": 0.0009, "step": 8654 }, { "epoch": 2.03, "learning_rate": 4.984644135223563e-06, "loss": 0.0401, "step": 8655 }, { "epoch": 2.03, "learning_rate": 4.982451727669912e-06, "loss": 0.0071, "step": 8656 }, { "epoch": 2.03, "learning_rate": 4.9802596423924155e-06, "loss": 0.0059, "step": 8657 }, { "epoch": 2.03, "learning_rate": 4.978067879531866e-06, "loss": 0.089, "step": 8658 }, { "epoch": 2.03, "learning_rate": 4.97587643922904e-06, "loss": 0.028, "step": 8659 }, { "epoch": 2.03, "learning_rate": 4.973685321624692e-06, "loss": 0.0005, "step": 8660 }, { "epoch": 2.03, "learning_rate": 4.971494526859557e-06, "loss": 0.0719, "step": 8661 }, { "epoch": 2.03, "learning_rate": 4.969304055074355e-06, "loss": 0.0029, "step": 8662 }, { "epoch": 2.03, "learning_rate": 4.967113906409777e-06, "loss": 0.0046, "step": 8663 }, { "epoch": 2.03, "learning_rate": 4.964924081006494e-06, "loss": 0.0037, "step": 8664 }, { "epoch": 2.03, "learning_rate": 4.962734579005156e-06, "loss": 0.0032, "step": 8665 }, { "epoch": 2.03, "learning_rate": 4.9605454005464e-06, "loss": 0.0045, "step": 8666 }, { "epoch": 2.03, "learning_rate": 4.958356545770834e-06, "loss": 0.0122, "step": 8667 }, { "epoch": 2.03, "learning_rate": 4.956168014819047e-06, "loss": 0.0136, "step": 8668 }, { "epoch": 2.03, "learning_rate": 4.953979807831604e-06, "loss": 0.0017, "step": 8669 }, { "epoch": 2.03, "learning_rate": 4.951791924949062e-06, "loss": 0.0078, "step": 8670 }, { "epoch": 2.04, "learning_rate": 4.949604366311942e-06, "loss": 0.0003, "step": 8671 }, { "epoch": 2.04, "learning_rate": 4.94741713206075e-06, "loss": 0.0016, "step": 8672 }, { "epoch": 2.04, "learning_rate": 4.945230222335976e-06, "loss": 0.0194, "step": 8673 }, { "epoch": 2.04, "learning_rate": 4.943043637278083e-06, "loss": 0.0344, "step": 8674 }, { "epoch": 2.04, "learning_rate": 4.9408573770275145e-06, "loss": 0.0234, "step": 8675 }, { "epoch": 2.04, "learning_rate": 4.938671441724689e-06, "loss": 0.0164, "step": 8676 }, { "epoch": 2.04, "learning_rate": 4.936485831510014e-06, "loss": 0.0079, "step": 8677 }, { "epoch": 2.04, "learning_rate": 4.934300546523877e-06, "loss": 0.0034, "step": 8678 }, { "epoch": 2.04, "learning_rate": 4.932115586906624e-06, "loss": 0.0072, "step": 8679 }, { "epoch": 2.04, "learning_rate": 4.929930952798606e-06, "loss": 0.0419, "step": 8680 }, { "epoch": 2.04, "learning_rate": 4.927746644340133e-06, "loss": 0.0331, "step": 8681 }, { "epoch": 2.04, "learning_rate": 4.925562661671512e-06, "loss": 0.0024, "step": 8682 }, { "epoch": 2.04, "learning_rate": 4.923379004933015e-06, "loss": 0.0105, "step": 8683 }, { "epoch": 2.04, "learning_rate": 4.921195674264897e-06, "loss": 0.0185, "step": 8684 }, { "epoch": 2.04, "learning_rate": 4.919012669807392e-06, "loss": 0.0085, "step": 8685 }, { "epoch": 2.04, "learning_rate": 4.916829991700717e-06, "loss": 0.0156, "step": 8686 }, { "epoch": 2.04, "learning_rate": 4.914647640085065e-06, "loss": 0.0139, "step": 8687 }, { "epoch": 2.04, "learning_rate": 4.912465615100602e-06, "loss": 0.0339, "step": 8688 }, { "epoch": 2.04, "learning_rate": 4.91028391688749e-06, "loss": 0.0035, "step": 8689 }, { "epoch": 2.04, "learning_rate": 4.90810254558585e-06, "loss": 0.0002, "step": 8690 }, { "epoch": 2.04, "learning_rate": 4.905921501335795e-06, "loss": 0.0215, "step": 8691 }, { "epoch": 2.04, "learning_rate": 4.903740784277409e-06, "loss": 0.0016, "step": 8692 }, { "epoch": 2.04, "learning_rate": 4.901560394550764e-06, "loss": 0.0114, "step": 8693 }, { "epoch": 2.04, "learning_rate": 4.899380332295905e-06, "loss": 0.0309, "step": 8694 }, { "epoch": 2.04, "learning_rate": 4.897200597652852e-06, "loss": 0.0009, "step": 8695 }, { "epoch": 2.04, "learning_rate": 4.895021190761617e-06, "loss": 0.0327, "step": 8696 }, { "epoch": 2.04, "learning_rate": 4.892842111762175e-06, "loss": 0.0294, "step": 8697 }, { "epoch": 2.04, "learning_rate": 4.890663360794495e-06, "loss": 0.0085, "step": 8698 }, { "epoch": 2.04, "learning_rate": 4.888484937998515e-06, "loss": 0.057, "step": 8699 }, { "epoch": 2.04, "learning_rate": 4.886306843514154e-06, "loss": 0.0694, "step": 8700 }, { "epoch": 2.04, "learning_rate": 4.884129077481307e-06, "loss": 0.0047, "step": 8701 }, { "epoch": 2.04, "learning_rate": 4.881951640039858e-06, "loss": 0.0879, "step": 8702 }, { "epoch": 2.04, "learning_rate": 4.8797745313296605e-06, "loss": 0.0005, "step": 8703 }, { "epoch": 2.04, "learning_rate": 4.877597751490547e-06, "loss": 0.0058, "step": 8704 }, { "epoch": 2.04, "learning_rate": 4.875421300662337e-06, "loss": 0.0339, "step": 8705 }, { "epoch": 2.04, "learning_rate": 4.873245178984821e-06, "loss": 0.0034, "step": 8706 }, { "epoch": 2.04, "learning_rate": 4.871069386597772e-06, "loss": 0.0004, "step": 8707 }, { "epoch": 2.04, "learning_rate": 4.868893923640935e-06, "loss": 0.0301, "step": 8708 }, { "epoch": 2.04, "learning_rate": 4.866718790254048e-06, "loss": 0.0014, "step": 8709 }, { "epoch": 2.04, "learning_rate": 4.864543986576815e-06, "loss": 0.0094, "step": 8710 }, { "epoch": 2.04, "learning_rate": 4.86236951274892e-06, "loss": 0.027, "step": 8711 }, { "epoch": 2.04, "learning_rate": 4.860195368910037e-06, "loss": 0.0004, "step": 8712 }, { "epoch": 2.04, "learning_rate": 4.858021555199803e-06, "loss": 0.0109, "step": 8713 }, { "epoch": 2.05, "learning_rate": 4.855848071757852e-06, "loss": 0.038, "step": 8714 }, { "epoch": 2.05, "learning_rate": 4.853674918723772e-06, "loss": 0.0203, "step": 8715 }, { "epoch": 2.05, "learning_rate": 4.8515020962371566e-06, "loss": 0.023, "step": 8716 }, { "epoch": 2.05, "learning_rate": 4.849329604437556e-06, "loss": 0.0257, "step": 8717 }, { "epoch": 2.05, "learning_rate": 4.847157443464518e-06, "loss": 0.0002, "step": 8718 }, { "epoch": 2.05, "learning_rate": 4.844985613457556e-06, "loss": 0.0392, "step": 8719 }, { "epoch": 2.05, "learning_rate": 4.8428141145561625e-06, "loss": 0.0006, "step": 8720 }, { "epoch": 2.05, "learning_rate": 4.84064294689982e-06, "loss": 0.0542, "step": 8721 }, { "epoch": 2.05, "learning_rate": 4.838472110627978e-06, "loss": 0.0504, "step": 8722 }, { "epoch": 2.05, "learning_rate": 4.836301605880069e-06, "loss": 0.0014, "step": 8723 }, { "epoch": 2.05, "learning_rate": 4.834131432795502e-06, "loss": 0.0388, "step": 8724 }, { "epoch": 2.05, "learning_rate": 4.831961591513671e-06, "loss": 0.0042, "step": 8725 }, { "epoch": 2.05, "learning_rate": 4.829792082173943e-06, "loss": 0.0196, "step": 8726 }, { "epoch": 2.05, "learning_rate": 4.827622904915666e-06, "loss": 0.0128, "step": 8727 }, { "epoch": 2.05, "learning_rate": 4.82545405987816e-06, "loss": 0.0009, "step": 8728 }, { "epoch": 2.05, "learning_rate": 4.8232855472007335e-06, "loss": 0.0206, "step": 8729 }, { "epoch": 2.05, "learning_rate": 4.821117367022679e-06, "loss": 0.0011, "step": 8730 }, { "epoch": 2.05, "learning_rate": 4.81894951948324e-06, "loss": 0.018, "step": 8731 }, { "epoch": 2.05, "learning_rate": 4.816782004721672e-06, "loss": 0.0057, "step": 8732 }, { "epoch": 2.05, "learning_rate": 4.814614822877185e-06, "loss": 0.0014, "step": 8733 }, { "epoch": 2.05, "learning_rate": 4.812447974088983e-06, "loss": 0.0359, "step": 8734 }, { "epoch": 2.05, "learning_rate": 4.81028145849624e-06, "loss": 0.0761, "step": 8735 }, { "epoch": 2.05, "learning_rate": 4.808115276238109e-06, "loss": 0.0522, "step": 8736 }, { "epoch": 2.05, "learning_rate": 4.805949427453722e-06, "loss": 0.0073, "step": 8737 }, { "epoch": 2.05, "learning_rate": 4.803783912282198e-06, "loss": 0.0045, "step": 8738 }, { "epoch": 2.05, "learning_rate": 4.8016187308626225e-06, "loss": 0.002, "step": 8739 }, { "epoch": 2.05, "learning_rate": 4.799453883334063e-06, "loss": 0.0246, "step": 8740 }, { "epoch": 2.05, "learning_rate": 4.797289369835573e-06, "loss": 0.0122, "step": 8741 }, { "epoch": 2.05, "learning_rate": 4.795125190506176e-06, "loss": 0.0238, "step": 8742 }, { "epoch": 2.05, "learning_rate": 4.792961345484876e-06, "loss": 0.0029, "step": 8743 }, { "epoch": 2.05, "learning_rate": 4.790797834910652e-06, "loss": 0.0026, "step": 8744 }, { "epoch": 2.05, "learning_rate": 4.788634658922476e-06, "loss": 0.0008, "step": 8745 }, { "epoch": 2.05, "learning_rate": 4.7864718176592826e-06, "loss": 0.0014, "step": 8746 }, { "epoch": 2.05, "learning_rate": 4.784309311259988e-06, "loss": 0.0069, "step": 8747 }, { "epoch": 2.05, "learning_rate": 4.782147139863495e-06, "loss": 0.0492, "step": 8748 }, { "epoch": 2.05, "learning_rate": 4.779985303608675e-06, "loss": 0.0969, "step": 8749 }, { "epoch": 2.05, "learning_rate": 4.777823802634391e-06, "loss": 0.0214, "step": 8750 }, { "epoch": 2.05, "learning_rate": 4.7756626370794625e-06, "loss": 0.0292, "step": 8751 }, { "epoch": 2.05, "learning_rate": 4.77350180708271e-06, "loss": 0.0295, "step": 8752 }, { "epoch": 2.05, "learning_rate": 4.771341312782918e-06, "loss": 0.0005, "step": 8753 }, { "epoch": 2.05, "learning_rate": 4.7691811543188605e-06, "loss": 0.0008, "step": 8754 }, { "epoch": 2.05, "learning_rate": 4.7670213318292815e-06, "loss": 0.0031, "step": 8755 }, { "epoch": 2.05, "learning_rate": 4.7648618454529e-06, "loss": 0.0005, "step": 8756 }, { "epoch": 2.06, "learning_rate": 4.7627026953284305e-06, "loss": 0.0179, "step": 8757 }, { "epoch": 2.06, "learning_rate": 4.760543881594548e-06, "loss": 0.024, "step": 8758 }, { "epoch": 2.06, "learning_rate": 4.758385404389914e-06, "loss": 0.0194, "step": 8759 }, { "epoch": 2.06, "learning_rate": 4.756227263853163e-06, "loss": 0.0215, "step": 8760 }, { "epoch": 2.06, "learning_rate": 4.7540694601229186e-06, "loss": 0.0053, "step": 8761 }, { "epoch": 2.06, "learning_rate": 4.751911993337775e-06, "loss": 0.0071, "step": 8762 }, { "epoch": 2.06, "learning_rate": 4.749754863636299e-06, "loss": 0.0328, "step": 8763 }, { "epoch": 2.06, "learning_rate": 4.747598071157051e-06, "loss": 0.0011, "step": 8764 }, { "epoch": 2.06, "learning_rate": 4.745441616038557e-06, "loss": 0.0071, "step": 8765 }, { "epoch": 2.06, "learning_rate": 4.743285498419332e-06, "loss": 0.0012, "step": 8766 }, { "epoch": 2.06, "learning_rate": 4.74112971843785e-06, "loss": 0.039, "step": 8767 }, { "epoch": 2.06, "learning_rate": 4.7389742762325895e-06, "loss": 0.023, "step": 8768 }, { "epoch": 2.06, "learning_rate": 4.736819171941984e-06, "loss": 0.0107, "step": 8769 }, { "epoch": 2.06, "learning_rate": 4.734664405704465e-06, "loss": 0.0562, "step": 8770 }, { "epoch": 2.06, "learning_rate": 4.732509977658426e-06, "loss": 0.0346, "step": 8771 }, { "epoch": 2.06, "learning_rate": 4.730355887942245e-06, "loss": 0.0089, "step": 8772 }, { "epoch": 2.06, "learning_rate": 4.728202136694286e-06, "loss": 0.0123, "step": 8773 }, { "epoch": 2.06, "learning_rate": 4.726048724052877e-06, "loss": 0.0198, "step": 8774 }, { "epoch": 2.06, "learning_rate": 4.723895650156335e-06, "loss": 0.0659, "step": 8775 }, { "epoch": 2.06, "learning_rate": 4.721742915142945e-06, "loss": 0.0159, "step": 8776 }, { "epoch": 2.06, "learning_rate": 4.719590519150986e-06, "loss": 0.0008, "step": 8777 }, { "epoch": 2.06, "learning_rate": 4.717438462318702e-06, "loss": 0.0151, "step": 8778 }, { "epoch": 2.06, "learning_rate": 4.715286744784318e-06, "loss": 0.0575, "step": 8779 }, { "epoch": 2.06, "learning_rate": 4.713135366686038e-06, "loss": 0.0017, "step": 8780 }, { "epoch": 2.06, "learning_rate": 4.7109843281620435e-06, "loss": 0.0054, "step": 8781 }, { "epoch": 2.06, "learning_rate": 4.708833629350507e-06, "loss": 0.0424, "step": 8782 }, { "epoch": 2.06, "learning_rate": 4.7066832703895495e-06, "loss": 0.031, "step": 8783 }, { "epoch": 2.06, "learning_rate": 4.7045332514173e-06, "loss": 0.0005, "step": 8784 }, { "epoch": 2.06, "learning_rate": 4.702383572571848e-06, "loss": 0.0784, "step": 8785 }, { "epoch": 2.06, "learning_rate": 4.700234233991276e-06, "loss": 0.038, "step": 8786 }, { "epoch": 2.06, "learning_rate": 4.698085235813623e-06, "loss": 0.011, "step": 8787 }, { "epoch": 2.06, "learning_rate": 4.695936578176929e-06, "loss": 0.0145, "step": 8788 }, { "epoch": 2.06, "learning_rate": 4.693788261219192e-06, "loss": 0.0517, "step": 8789 }, { "epoch": 2.06, "learning_rate": 4.691640285078409e-06, "loss": 0.0524, "step": 8790 }, { "epoch": 2.06, "learning_rate": 4.689492649892538e-06, "loss": 0.0814, "step": 8791 }, { "epoch": 2.06, "learning_rate": 4.687345355799519e-06, "loss": 0.0548, "step": 8792 }, { "epoch": 2.06, "learning_rate": 4.685198402937278e-06, "loss": 0.0017, "step": 8793 }, { "epoch": 2.06, "learning_rate": 4.683051791443711e-06, "loss": 0.0056, "step": 8794 }, { "epoch": 2.06, "learning_rate": 4.680905521456694e-06, "loss": 0.0096, "step": 8795 }, { "epoch": 2.06, "learning_rate": 4.6787595931140795e-06, "loss": 0.0133, "step": 8796 }, { "epoch": 2.06, "learning_rate": 4.6766140065537e-06, "loss": 0.0015, "step": 8797 }, { "epoch": 2.06, "learning_rate": 4.674468761913378e-06, "loss": 0.0042, "step": 8798 }, { "epoch": 2.07, "learning_rate": 4.672323859330883e-06, "loss": 0.0045, "step": 8799 }, { "epoch": 2.07, "learning_rate": 4.670179298943995e-06, "loss": 0.0302, "step": 8800 }, { "epoch": 2.07, "learning_rate": 4.668035080890452e-06, "loss": 0.035, "step": 8801 }, { "epoch": 2.07, "learning_rate": 4.665891205307986e-06, "loss": 0.0619, "step": 8802 }, { "epoch": 2.07, "learning_rate": 4.663747672334284e-06, "loss": 0.001, "step": 8803 }, { "epoch": 2.07, "learning_rate": 4.661604482107036e-06, "loss": 0.0008, "step": 8804 }, { "epoch": 2.07, "learning_rate": 4.659461634763891e-06, "loss": 0.0048, "step": 8805 }, { "epoch": 2.07, "learning_rate": 4.657319130442492e-06, "loss": 0.0163, "step": 8806 }, { "epoch": 2.07, "learning_rate": 4.655176969280445e-06, "loss": 0.0021, "step": 8807 }, { "epoch": 2.07, "learning_rate": 4.6530351514153395e-06, "loss": 0.0351, "step": 8808 }, { "epoch": 2.07, "learning_rate": 4.650893676984751e-06, "loss": 0.0106, "step": 8809 }, { "epoch": 2.07, "learning_rate": 4.648752546126221e-06, "loss": 0.0035, "step": 8810 }, { "epoch": 2.07, "learning_rate": 4.646611758977276e-06, "loss": 0.0273, "step": 8811 }, { "epoch": 2.07, "learning_rate": 4.644471315675414e-06, "loss": 0.0454, "step": 8812 }, { "epoch": 2.07, "learning_rate": 4.6423312163581215e-06, "loss": 0.0014, "step": 8813 }, { "epoch": 2.07, "learning_rate": 4.640191461162854e-06, "loss": 0.0209, "step": 8814 }, { "epoch": 2.07, "learning_rate": 4.638052050227043e-06, "loss": 0.0065, "step": 8815 }, { "epoch": 2.07, "learning_rate": 4.635912983688111e-06, "loss": 0.0247, "step": 8816 }, { "epoch": 2.07, "learning_rate": 4.633774261683442e-06, "loss": 0.018, "step": 8817 }, { "epoch": 2.07, "learning_rate": 4.6316358843504175e-06, "loss": 0.0076, "step": 8818 }, { "epoch": 2.07, "learning_rate": 4.629497851826368e-06, "loss": 0.0599, "step": 8819 }, { "epoch": 2.07, "learning_rate": 4.627360164248632e-06, "loss": 0.0053, "step": 8820 }, { "epoch": 2.07, "learning_rate": 4.625222821754505e-06, "loss": 0.0008, "step": 8821 }, { "epoch": 2.07, "learning_rate": 4.623085824481278e-06, "loss": 0.0149, "step": 8822 }, { "epoch": 2.07, "learning_rate": 4.620949172566197e-06, "loss": 0.0252, "step": 8823 }, { "epoch": 2.07, "learning_rate": 4.618812866146505e-06, "loss": 0.001, "step": 8824 }, { "epoch": 2.07, "learning_rate": 4.61667690535942e-06, "loss": 0.0241, "step": 8825 }, { "epoch": 2.07, "learning_rate": 4.614541290342132e-06, "loss": 0.0131, "step": 8826 }, { "epoch": 2.07, "learning_rate": 4.61240602123181e-06, "loss": 0.047, "step": 8827 }, { "epoch": 2.07, "learning_rate": 4.610271098165599e-06, "loss": 0.0065, "step": 8828 }, { "epoch": 2.07, "learning_rate": 4.60813652128063e-06, "loss": 0.0001, "step": 8829 }, { "epoch": 2.07, "learning_rate": 4.606002290714006e-06, "loss": 0.0222, "step": 8830 }, { "epoch": 2.07, "learning_rate": 4.603868406602806e-06, "loss": 0.0021, "step": 8831 }, { "epoch": 2.07, "learning_rate": 4.6017348690840855e-06, "loss": 0.0088, "step": 8832 }, { "epoch": 2.07, "learning_rate": 4.599601678294886e-06, "loss": 0.0005, "step": 8833 }, { "epoch": 2.07, "learning_rate": 4.597468834372228e-06, "loss": 0.0108, "step": 8834 }, { "epoch": 2.07, "learning_rate": 4.595336337453089e-06, "loss": 0.0312, "step": 8835 }, { "epoch": 2.07, "learning_rate": 4.59320418767445e-06, "loss": 0.0173, "step": 8836 }, { "epoch": 2.07, "learning_rate": 4.591072385173253e-06, "loss": 0.0498, "step": 8837 }, { "epoch": 2.07, "learning_rate": 4.588940930086433e-06, "loss": 0.0113, "step": 8838 }, { "epoch": 2.07, "learning_rate": 4.5868098225508765e-06, "loss": 0.0069, "step": 8839 }, { "epoch": 2.07, "learning_rate": 4.584679062703474e-06, "loss": 0.0044, "step": 8840 }, { "epoch": 2.07, "learning_rate": 4.582548650681087e-06, "loss": 0.026, "step": 8841 }, { "epoch": 2.08, "learning_rate": 4.580418586620546e-06, "loss": 0.0696, "step": 8842 }, { "epoch": 2.08, "learning_rate": 4.578288870658667e-06, "loss": 0.0252, "step": 8843 }, { "epoch": 2.08, "learning_rate": 4.576159502932236e-06, "loss": 0.0137, "step": 8844 }, { "epoch": 2.08, "learning_rate": 4.574030483578029e-06, "loss": 0.0132, "step": 8845 }, { "epoch": 2.08, "learning_rate": 4.571901812732791e-06, "loss": 0.0058, "step": 8846 }, { "epoch": 2.08, "learning_rate": 4.569773490533244e-06, "loss": 0.0345, "step": 8847 }, { "epoch": 2.08, "learning_rate": 4.567645517116086e-06, "loss": 0.0081, "step": 8848 }, { "epoch": 2.08, "learning_rate": 4.565517892618002e-06, "loss": 0.0545, "step": 8849 }, { "epoch": 2.08, "learning_rate": 4.563390617175654e-06, "loss": 0.0194, "step": 8850 }, { "epoch": 2.08, "learning_rate": 4.561263690925662e-06, "loss": 0.0254, "step": 8851 }, { "epoch": 2.08, "learning_rate": 4.559137114004652e-06, "loss": 0.001, "step": 8852 }, { "epoch": 2.08, "learning_rate": 4.557010886549202e-06, "loss": 0.0575, "step": 8853 }, { "epoch": 2.08, "learning_rate": 4.554885008695894e-06, "loss": 0.0021, "step": 8854 }, { "epoch": 2.08, "learning_rate": 4.552759480581255e-06, "loss": 0.0363, "step": 8855 }, { "epoch": 2.08, "learning_rate": 4.5506343023418195e-06, "loss": 0.01, "step": 8856 }, { "epoch": 2.08, "learning_rate": 4.548509474114081e-06, "loss": 0.0041, "step": 8857 }, { "epoch": 2.08, "learning_rate": 4.546384996034523e-06, "loss": 0.0149, "step": 8858 }, { "epoch": 2.08, "learning_rate": 4.544260868239596e-06, "loss": 0.0058, "step": 8859 }, { "epoch": 2.08, "learning_rate": 4.54213709086573e-06, "loss": 0.0135, "step": 8860 }, { "epoch": 2.08, "learning_rate": 4.540013664049342e-06, "loss": 0.0036, "step": 8861 }, { "epoch": 2.08, "learning_rate": 4.537890587926814e-06, "loss": 0.0111, "step": 8862 }, { "epoch": 2.08, "learning_rate": 4.535767862634514e-06, "loss": 0.0035, "step": 8863 }, { "epoch": 2.08, "learning_rate": 4.5336454883087765e-06, "loss": 0.0004, "step": 8864 }, { "epoch": 2.08, "learning_rate": 4.531523465085932e-06, "loss": 0.002, "step": 8865 }, { "epoch": 2.08, "learning_rate": 4.529401793102272e-06, "loss": 0.0077, "step": 8866 }, { "epoch": 2.08, "learning_rate": 4.527280472494068e-06, "loss": 0.0077, "step": 8867 }, { "epoch": 2.08, "learning_rate": 4.525159503397579e-06, "loss": 0.0024, "step": 8868 }, { "epoch": 2.08, "learning_rate": 4.5230388859490285e-06, "loss": 0.0048, "step": 8869 }, { "epoch": 2.08, "learning_rate": 4.520918620284632e-06, "loss": 0.0165, "step": 8870 }, { "epoch": 2.08, "learning_rate": 4.51879870654056e-06, "loss": 0.0043, "step": 8871 }, { "epoch": 2.08, "learning_rate": 4.516679144852987e-06, "loss": 0.0081, "step": 8872 }, { "epoch": 2.08, "learning_rate": 4.5145599353580425e-06, "loss": 0.0416, "step": 8873 }, { "epoch": 2.08, "learning_rate": 4.512441078191855e-06, "loss": 0.0007, "step": 8874 }, { "epoch": 2.08, "learning_rate": 4.510322573490502e-06, "loss": 0.0075, "step": 8875 }, { "epoch": 2.08, "learning_rate": 4.508204421390065e-06, "loss": 0.0305, "step": 8876 }, { "epoch": 2.08, "learning_rate": 4.506086622026593e-06, "loss": 0.0042, "step": 8877 }, { "epoch": 2.08, "learning_rate": 4.50396917553611e-06, "loss": 0.0729, "step": 8878 }, { "epoch": 2.08, "learning_rate": 4.501852082054619e-06, "loss": 0.0109, "step": 8879 }, { "epoch": 2.08, "learning_rate": 4.4997353417180955e-06, "loss": 0.0061, "step": 8880 }, { "epoch": 2.08, "learning_rate": 4.497618954662507e-06, "loss": 0.0324, "step": 8881 }, { "epoch": 2.08, "learning_rate": 4.495502921023783e-06, "loss": 0.0206, "step": 8882 }, { "epoch": 2.08, "learning_rate": 4.4933872409378335e-06, "loss": 0.0083, "step": 8883 }, { "epoch": 2.09, "learning_rate": 4.4912719145405545e-06, "loss": 0.0003, "step": 8884 }, { "epoch": 2.09, "learning_rate": 4.489156941967806e-06, "loss": 0.0008, "step": 8885 }, { "epoch": 2.09, "learning_rate": 4.487042323355444e-06, "loss": 0.0011, "step": 8886 }, { "epoch": 2.09, "learning_rate": 4.484928058839274e-06, "loss": 0.0045, "step": 8887 }, { "epoch": 2.09, "learning_rate": 4.482814148555107e-06, "loss": 0.0088, "step": 8888 }, { "epoch": 2.09, "learning_rate": 4.480700592638712e-06, "loss": 0.0623, "step": 8889 }, { "epoch": 2.09, "learning_rate": 4.4785873912258515e-06, "loss": 0.0008, "step": 8890 }, { "epoch": 2.09, "learning_rate": 4.476474544452243e-06, "loss": 0.0213, "step": 8891 }, { "epoch": 2.09, "learning_rate": 4.474362052453601e-06, "loss": 0.0022, "step": 8892 }, { "epoch": 2.09, "learning_rate": 4.472249915365614e-06, "loss": 0.0209, "step": 8893 }, { "epoch": 2.09, "learning_rate": 4.470138133323941e-06, "loss": 0.0014, "step": 8894 }, { "epoch": 2.09, "learning_rate": 4.468026706464221e-06, "loss": 0.0126, "step": 8895 }, { "epoch": 2.09, "learning_rate": 4.465915634922067e-06, "loss": 0.0184, "step": 8896 }, { "epoch": 2.09, "learning_rate": 4.463804918833079e-06, "loss": 0.0001, "step": 8897 }, { "epoch": 2.09, "learning_rate": 4.461694558332826e-06, "loss": 0.0567, "step": 8898 }, { "epoch": 2.09, "learning_rate": 4.4595845535568545e-06, "loss": 0.0097, "step": 8899 }, { "epoch": 2.09, "learning_rate": 4.457474904640687e-06, "loss": 0.0027, "step": 8900 }, { "epoch": 2.09, "learning_rate": 4.4553656117198345e-06, "loss": 0.0298, "step": 8901 }, { "epoch": 2.09, "learning_rate": 4.453256674929769e-06, "loss": 0.0024, "step": 8902 }, { "epoch": 2.09, "learning_rate": 4.451148094405946e-06, "loss": 0.0254, "step": 8903 }, { "epoch": 2.09, "learning_rate": 4.449039870283807e-06, "loss": 0.0148, "step": 8904 }, { "epoch": 2.09, "learning_rate": 4.446932002698753e-06, "loss": 0.0059, "step": 8905 }, { "epoch": 2.09, "learning_rate": 4.444824491786186e-06, "loss": 0.0244, "step": 8906 }, { "epoch": 2.09, "learning_rate": 4.4427173376814545e-06, "loss": 0.0288, "step": 8907 }, { "epoch": 2.09, "learning_rate": 4.440610540519912e-06, "loss": 0.0131, "step": 8908 }, { "epoch": 2.09, "learning_rate": 4.438504100436871e-06, "loss": 0.0341, "step": 8909 }, { "epoch": 2.09, "learning_rate": 4.4363980175676335e-06, "loss": 0.0175, "step": 8910 }, { "epoch": 2.09, "learning_rate": 4.434292292047471e-06, "loss": 0.0459, "step": 8911 }, { "epoch": 2.09, "learning_rate": 4.432186924011628e-06, "loss": 0.0527, "step": 8912 }, { "epoch": 2.09, "learning_rate": 4.4300819135953414e-06, "loss": 0.0038, "step": 8913 }, { "epoch": 2.09, "learning_rate": 4.427977260933811e-06, "loss": 0.0029, "step": 8914 }, { "epoch": 2.09, "learning_rate": 4.425872966162218e-06, "loss": 0.0069, "step": 8915 }, { "epoch": 2.09, "learning_rate": 4.423769029415718e-06, "loss": 0.0229, "step": 8916 }, { "epoch": 2.09, "learning_rate": 4.421665450829452e-06, "loss": 0.0146, "step": 8917 }, { "epoch": 2.09, "learning_rate": 4.4195622305385296e-06, "loss": 0.0096, "step": 8918 }, { "epoch": 2.09, "learning_rate": 4.4174593686780385e-06, "loss": 0.0002, "step": 8919 }, { "epoch": 2.09, "learning_rate": 4.41535686538305e-06, "loss": 0.0106, "step": 8920 }, { "epoch": 2.09, "learning_rate": 4.413254720788605e-06, "loss": 0.0001, "step": 8921 }, { "epoch": 2.09, "learning_rate": 4.4111529350297235e-06, "loss": 0.0069, "step": 8922 }, { "epoch": 2.09, "learning_rate": 4.4090515082414e-06, "loss": 0.006, "step": 8923 }, { "epoch": 2.09, "learning_rate": 4.406950440558614e-06, "loss": 0.0308, "step": 8924 }, { "epoch": 2.09, "learning_rate": 4.4048497321163105e-06, "loss": 0.0026, "step": 8925 }, { "epoch": 2.09, "learning_rate": 4.402749383049425e-06, "loss": 0.0004, "step": 8926 }, { "epoch": 2.1, "learning_rate": 4.4006493934928594e-06, "loss": 0.04, "step": 8927 }, { "epoch": 2.1, "learning_rate": 4.398549763581491e-06, "loss": 0.0043, "step": 8928 }, { "epoch": 2.1, "learning_rate": 4.396450493450186e-06, "loss": 0.0217, "step": 8929 }, { "epoch": 2.1, "learning_rate": 4.394351583233778e-06, "loss": 0.0016, "step": 8930 }, { "epoch": 2.1, "learning_rate": 4.392253033067078e-06, "loss": 0.0129, "step": 8931 }, { "epoch": 2.1, "learning_rate": 4.3901548430848715e-06, "loss": 0.0315, "step": 8932 }, { "epoch": 2.1, "learning_rate": 4.388057013421933e-06, "loss": 0.0433, "step": 8933 }, { "epoch": 2.1, "learning_rate": 4.385959544213002e-06, "loss": 0.0004, "step": 8934 }, { "epoch": 2.1, "learning_rate": 4.383862435592795e-06, "loss": 0.0028, "step": 8935 }, { "epoch": 2.1, "learning_rate": 4.381765687696016e-06, "loss": 0.0003, "step": 8936 }, { "epoch": 2.1, "learning_rate": 4.379669300657335e-06, "loss": 0.0172, "step": 8937 }, { "epoch": 2.1, "learning_rate": 4.377573274611401e-06, "loss": 0.0421, "step": 8938 }, { "epoch": 2.1, "learning_rate": 4.375477609692841e-06, "loss": 0.0156, "step": 8939 }, { "epoch": 2.1, "learning_rate": 4.3733823060362635e-06, "loss": 0.0026, "step": 8940 }, { "epoch": 2.1, "learning_rate": 4.371287363776245e-06, "loss": 0.0337, "step": 8941 }, { "epoch": 2.1, "learning_rate": 4.36919278304735e-06, "loss": 0.0073, "step": 8942 }, { "epoch": 2.1, "learning_rate": 4.367098563984102e-06, "loss": 0.0593, "step": 8943 }, { "epoch": 2.1, "learning_rate": 4.36500470672102e-06, "loss": 0.0702, "step": 8944 }, { "epoch": 2.1, "learning_rate": 4.362911211392593e-06, "loss": 0.02, "step": 8945 }, { "epoch": 2.1, "learning_rate": 4.3608180781332836e-06, "loss": 0.0024, "step": 8946 }, { "epoch": 2.1, "learning_rate": 4.358725307077532e-06, "loss": 0.0149, "step": 8947 }, { "epoch": 2.1, "learning_rate": 4.356632898359755e-06, "loss": 0.047, "step": 8948 }, { "epoch": 2.1, "learning_rate": 4.354540852114354e-06, "loss": 0.0982, "step": 8949 }, { "epoch": 2.1, "learning_rate": 4.352449168475697e-06, "loss": 0.0503, "step": 8950 }, { "epoch": 2.1, "learning_rate": 4.350357847578131e-06, "loss": 0.0003, "step": 8951 }, { "epoch": 2.1, "learning_rate": 4.34826688955598e-06, "loss": 0.0208, "step": 8952 }, { "epoch": 2.1, "learning_rate": 4.346176294543552e-06, "loss": 0.0456, "step": 8953 }, { "epoch": 2.1, "learning_rate": 4.34408606267512e-06, "loss": 0.0259, "step": 8954 }, { "epoch": 2.1, "learning_rate": 4.34199619408494e-06, "loss": 0.0569, "step": 8955 }, { "epoch": 2.1, "learning_rate": 4.339906688907246e-06, "loss": 0.001, "step": 8956 }, { "epoch": 2.1, "learning_rate": 4.3378175472762475e-06, "loss": 0.004, "step": 8957 }, { "epoch": 2.1, "learning_rate": 4.335728769326127e-06, "loss": 0.002, "step": 8958 }, { "epoch": 2.1, "learning_rate": 4.333640355191042e-06, "loss": 0.0004, "step": 8959 }, { "epoch": 2.1, "learning_rate": 4.331552305005137e-06, "loss": 0.0162, "step": 8960 }, { "epoch": 2.1, "learning_rate": 4.329464618902528e-06, "loss": 0.0063, "step": 8961 }, { "epoch": 2.1, "learning_rate": 4.327377297017305e-06, "loss": 0.0316, "step": 8962 }, { "epoch": 2.1, "learning_rate": 4.325290339483537e-06, "loss": 0.0053, "step": 8963 }, { "epoch": 2.1, "learning_rate": 4.3232037464352625e-06, "loss": 0.0468, "step": 8964 }, { "epoch": 2.1, "learning_rate": 4.321117518006513e-06, "loss": 0.0012, "step": 8965 }, { "epoch": 2.1, "learning_rate": 4.319031654331282e-06, "loss": 0.0256, "step": 8966 }, { "epoch": 2.1, "learning_rate": 4.316946155543542e-06, "loss": 0.0815, "step": 8967 }, { "epoch": 2.1, "learning_rate": 4.314861021777243e-06, "loss": 0.0057, "step": 8968 }, { "epoch": 2.1, "learning_rate": 4.31277625316632e-06, "loss": 0.0084, "step": 8969 }, { "epoch": 2.11, "learning_rate": 4.310691849844673e-06, "loss": 0.0259, "step": 8970 }, { "epoch": 2.11, "learning_rate": 4.308607811946178e-06, "loss": 0.0055, "step": 8971 }, { "epoch": 2.11, "learning_rate": 4.306524139604701e-06, "loss": 0.0016, "step": 8972 }, { "epoch": 2.11, "learning_rate": 4.304440832954072e-06, "loss": 0.0021, "step": 8973 }, { "epoch": 2.11, "learning_rate": 4.3023578921281025e-06, "loss": 0.0031, "step": 8974 }, { "epoch": 2.11, "learning_rate": 4.300275317260574e-06, "loss": 0.0157, "step": 8975 }, { "epoch": 2.11, "learning_rate": 4.298193108485258e-06, "loss": 0.0094, "step": 8976 }, { "epoch": 2.11, "learning_rate": 4.296111265935886e-06, "loss": 0.0246, "step": 8977 }, { "epoch": 2.11, "learning_rate": 4.294029789746184e-06, "loss": 0.0123, "step": 8978 }, { "epoch": 2.11, "learning_rate": 4.291948680049838e-06, "loss": 0.0341, "step": 8979 }, { "epoch": 2.11, "learning_rate": 4.289867936980516e-06, "loss": 0.0176, "step": 8980 }, { "epoch": 2.11, "learning_rate": 4.28778756067187e-06, "loss": 0.0003, "step": 8981 }, { "epoch": 2.11, "learning_rate": 4.2857075512575195e-06, "loss": 0.0009, "step": 8982 }, { "epoch": 2.11, "learning_rate": 4.2836279088710615e-06, "loss": 0.0101, "step": 8983 }, { "epoch": 2.11, "learning_rate": 4.28154863364607e-06, "loss": 0.0093, "step": 8984 }, { "epoch": 2.11, "learning_rate": 4.2794697257161e-06, "loss": 0.0015, "step": 8985 }, { "epoch": 2.11, "learning_rate": 4.277391185214678e-06, "loss": 0.0233, "step": 8986 }, { "epoch": 2.11, "learning_rate": 4.275313012275306e-06, "loss": 0.0127, "step": 8987 }, { "epoch": 2.11, "learning_rate": 4.27323520703147e-06, "loss": 0.0285, "step": 8988 }, { "epoch": 2.11, "learning_rate": 4.2711577696166225e-06, "loss": 0.0626, "step": 8989 }, { "epoch": 2.11, "learning_rate": 4.269080700164198e-06, "loss": 0.0014, "step": 8990 }, { "epoch": 2.11, "learning_rate": 4.267003998807604e-06, "loss": 0.0008, "step": 8991 }, { "epoch": 2.11, "learning_rate": 4.264927665680232e-06, "loss": 0.0034, "step": 8992 }, { "epoch": 2.11, "learning_rate": 4.262851700915441e-06, "loss": 0.0078, "step": 8993 }, { "epoch": 2.11, "learning_rate": 4.260776104646571e-06, "loss": 0.0186, "step": 8994 }, { "epoch": 2.11, "learning_rate": 4.258700877006933e-06, "loss": 0.0169, "step": 8995 }, { "epoch": 2.11, "learning_rate": 4.25662601812982e-06, "loss": 0.0173, "step": 8996 }, { "epoch": 2.11, "learning_rate": 4.254551528148507e-06, "loss": 0.0002, "step": 8997 }, { "epoch": 2.11, "learning_rate": 4.252477407196232e-06, "loss": 0.0143, "step": 8998 }, { "epoch": 2.11, "learning_rate": 4.250403655406217e-06, "loss": 0.0223, "step": 8999 }, { "epoch": 2.11, "learning_rate": 4.248330272911654e-06, "loss": 0.0018, "step": 9000 }, { "epoch": 2.11, "learning_rate": 4.246257259845723e-06, "loss": 0.0478, "step": 9001 }, { "epoch": 2.11, "learning_rate": 4.244184616341569e-06, "loss": 0.0621, "step": 9002 }, { "epoch": 2.11, "learning_rate": 4.242112342532317e-06, "loss": 0.0388, "step": 9003 }, { "epoch": 2.11, "learning_rate": 4.240040438551072e-06, "loss": 0.0202, "step": 9004 }, { "epoch": 2.11, "learning_rate": 4.23796890453091e-06, "loss": 0.0016, "step": 9005 }, { "epoch": 2.11, "learning_rate": 4.235897740604888e-06, "loss": 0.0006, "step": 9006 }, { "epoch": 2.11, "learning_rate": 4.233826946906028e-06, "loss": 0.0229, "step": 9007 }, { "epoch": 2.11, "learning_rate": 4.231756523567347e-06, "loss": 0.015, "step": 9008 }, { "epoch": 2.11, "learning_rate": 4.229686470721822e-06, "loss": 0.0145, "step": 9009 }, { "epoch": 2.11, "learning_rate": 4.227616788502415e-06, "loss": 0.0115, "step": 9010 }, { "epoch": 2.11, "learning_rate": 4.225547477042056e-06, "loss": 0.006, "step": 9011 }, { "epoch": 2.12, "learning_rate": 4.223478536473661e-06, "loss": 0.0107, "step": 9012 }, { "epoch": 2.12, "learning_rate": 4.221409966930119e-06, "loss": 0.0024, "step": 9013 }, { "epoch": 2.12, "learning_rate": 4.219341768544293e-06, "loss": 0.0019, "step": 9014 }, { "epoch": 2.12, "learning_rate": 4.217273941449022e-06, "loss": 0.0117, "step": 9015 }, { "epoch": 2.12, "learning_rate": 4.215206485777117e-06, "loss": 0.0005, "step": 9016 }, { "epoch": 2.12, "learning_rate": 4.213139401661379e-06, "loss": 0.0196, "step": 9017 }, { "epoch": 2.12, "learning_rate": 4.211072689234574e-06, "loss": 0.0012, "step": 9018 }, { "epoch": 2.12, "learning_rate": 4.209006348629443e-06, "loss": 0.0008, "step": 9019 }, { "epoch": 2.12, "learning_rate": 4.206940379978707e-06, "loss": 0.0035, "step": 9020 }, { "epoch": 2.12, "learning_rate": 4.204874783415067e-06, "loss": 0.0662, "step": 9021 }, { "epoch": 2.12, "learning_rate": 4.202809559071193e-06, "loss": 0.0101, "step": 9022 }, { "epoch": 2.12, "learning_rate": 4.200744707079733e-06, "loss": 0.0134, "step": 9023 }, { "epoch": 2.12, "learning_rate": 4.198680227573315e-06, "loss": 0.0007, "step": 9024 }, { "epoch": 2.12, "learning_rate": 4.196616120684539e-06, "loss": 0.0008, "step": 9025 }, { "epoch": 2.12, "learning_rate": 4.194552386545982e-06, "loss": 0.0047, "step": 9026 }, { "epoch": 2.12, "learning_rate": 4.192489025290193e-06, "loss": 0.0283, "step": 9027 }, { "epoch": 2.12, "learning_rate": 4.190426037049708e-06, "loss": 0.002, "step": 9028 }, { "epoch": 2.12, "learning_rate": 4.188363421957031e-06, "loss": 0.0257, "step": 9029 }, { "epoch": 2.12, "learning_rate": 4.186301180144638e-06, "loss": 0.0011, "step": 9030 }, { "epoch": 2.12, "learning_rate": 4.184239311744994e-06, "loss": 0.0101, "step": 9031 }, { "epoch": 2.12, "learning_rate": 4.182177816890525e-06, "loss": 0.0089, "step": 9032 }, { "epoch": 2.12, "learning_rate": 4.180116695713647e-06, "loss": 0.014, "step": 9033 }, { "epoch": 2.12, "learning_rate": 4.1780559483467445e-06, "loss": 0.0409, "step": 9034 }, { "epoch": 2.12, "learning_rate": 4.175995574922175e-06, "loss": 0.0002, "step": 9035 }, { "epoch": 2.12, "learning_rate": 4.173935575572276e-06, "loss": 0.0304, "step": 9036 }, { "epoch": 2.12, "learning_rate": 4.171875950429365e-06, "loss": 0.0491, "step": 9037 }, { "epoch": 2.12, "learning_rate": 4.169816699625729e-06, "loss": 0.0401, "step": 9038 }, { "epoch": 2.12, "learning_rate": 4.16775782329363e-06, "loss": 0.003, "step": 9039 }, { "epoch": 2.12, "learning_rate": 4.165699321565317e-06, "loss": 0.0006, "step": 9040 }, { "epoch": 2.12, "learning_rate": 4.163641194573e-06, "loss": 0.0126, "step": 9041 }, { "epoch": 2.12, "learning_rate": 4.1615834424488775e-06, "loss": 0.0156, "step": 9042 }, { "epoch": 2.12, "learning_rate": 4.159526065325111e-06, "loss": 0.0006, "step": 9043 }, { "epoch": 2.12, "learning_rate": 4.157469063333854e-06, "loss": 0.006, "step": 9044 }, { "epoch": 2.12, "learning_rate": 4.155412436607224e-06, "loss": 0.0051, "step": 9045 }, { "epoch": 2.12, "learning_rate": 4.153356185277314e-06, "loss": 0.0012, "step": 9046 }, { "epoch": 2.12, "learning_rate": 4.1513003094762025e-06, "loss": 0.0134, "step": 9047 }, { "epoch": 2.12, "learning_rate": 4.149244809335933e-06, "loss": 0.0089, "step": 9048 }, { "epoch": 2.12, "learning_rate": 4.147189684988538e-06, "loss": 0.0008, "step": 9049 }, { "epoch": 2.12, "learning_rate": 4.145134936566007e-06, "loss": 0.0099, "step": 9050 }, { "epoch": 2.12, "learning_rate": 4.143080564200323e-06, "loss": 0.0011, "step": 9051 }, { "epoch": 2.12, "learning_rate": 4.141026568023433e-06, "loss": 0.004, "step": 9052 }, { "epoch": 2.12, "learning_rate": 4.1389729481672714e-06, "loss": 0.0038, "step": 9053 }, { "epoch": 2.12, "learning_rate": 4.136919704763739e-06, "loss": 0.0027, "step": 9054 }, { "epoch": 2.13, "learning_rate": 4.13486683794471e-06, "loss": 0.0181, "step": 9055 }, { "epoch": 2.13, "learning_rate": 4.132814347842048e-06, "loss": 0.0038, "step": 9056 }, { "epoch": 2.13, "learning_rate": 4.1307622345875795e-06, "loss": 0.0226, "step": 9057 }, { "epoch": 2.13, "learning_rate": 4.128710498313113e-06, "loss": 0.0047, "step": 9058 }, { "epoch": 2.13, "learning_rate": 4.126659139150426e-06, "loss": 0.0555, "step": 9059 }, { "epoch": 2.13, "learning_rate": 4.124608157231285e-06, "loss": 0.0026, "step": 9060 }, { "epoch": 2.13, "learning_rate": 4.122557552687421e-06, "loss": 0.0006, "step": 9061 }, { "epoch": 2.13, "learning_rate": 4.120507325650542e-06, "loss": 0.0291, "step": 9062 }, { "epoch": 2.13, "learning_rate": 4.118457476252331e-06, "loss": 0.0154, "step": 9063 }, { "epoch": 2.13, "learning_rate": 4.116408004624454e-06, "loss": 0.0012, "step": 9064 }, { "epoch": 2.13, "learning_rate": 4.114358910898554e-06, "loss": 0.0303, "step": 9065 }, { "epoch": 2.13, "learning_rate": 4.112310195206229e-06, "loss": 0.005, "step": 9066 }, { "epoch": 2.13, "learning_rate": 4.110261857679082e-06, "loss": 0.0126, "step": 9067 }, { "epoch": 2.13, "learning_rate": 4.108213898448667e-06, "loss": 0.0038, "step": 9068 }, { "epoch": 2.13, "learning_rate": 4.10616631764653e-06, "loss": 0.0013, "step": 9069 }, { "epoch": 2.13, "learning_rate": 4.104119115404187e-06, "loss": 0.0016, "step": 9070 }, { "epoch": 2.13, "learning_rate": 4.102072291853126e-06, "loss": 0.0041, "step": 9071 }, { "epoch": 2.13, "learning_rate": 4.100025847124814e-06, "loss": 0.0363, "step": 9072 }, { "epoch": 2.13, "learning_rate": 4.097979781350696e-06, "loss": 0.0027, "step": 9073 }, { "epoch": 2.13, "learning_rate": 4.095934094662192e-06, "loss": 0.0004, "step": 9074 }, { "epoch": 2.13, "learning_rate": 4.093888787190688e-06, "loss": 0.0475, "step": 9075 }, { "epoch": 2.13, "learning_rate": 4.091843859067565e-06, "loss": 0.0004, "step": 9076 }, { "epoch": 2.13, "learning_rate": 4.089799310424161e-06, "loss": 0.049, "step": 9077 }, { "epoch": 2.13, "learning_rate": 4.087755141391799e-06, "loss": 0.0014, "step": 9078 }, { "epoch": 2.13, "learning_rate": 4.085711352101772e-06, "loss": 0.01, "step": 9079 }, { "epoch": 2.13, "learning_rate": 4.08366794268536e-06, "loss": 0.0109, "step": 9080 }, { "epoch": 2.13, "learning_rate": 4.081624913273805e-06, "loss": 0.0075, "step": 9081 }, { "epoch": 2.13, "learning_rate": 4.079582263998327e-06, "loss": 0.0681, "step": 9082 }, { "epoch": 2.13, "learning_rate": 4.077539994990135e-06, "loss": 0.038, "step": 9083 }, { "epoch": 2.13, "learning_rate": 4.075498106380395e-06, "loss": 0.0017, "step": 9084 }, { "epoch": 2.13, "learning_rate": 4.073456598300266e-06, "loss": 0.0018, "step": 9085 }, { "epoch": 2.13, "learning_rate": 4.071415470880861e-06, "loss": 0.0018, "step": 9086 }, { "epoch": 2.13, "learning_rate": 4.069374724253291e-06, "loss": 0.0256, "step": 9087 }, { "epoch": 2.13, "learning_rate": 4.067334358548627e-06, "loss": 0.0152, "step": 9088 }, { "epoch": 2.13, "learning_rate": 4.0652943738979274e-06, "loss": 0.0341, "step": 9089 }, { "epoch": 2.13, "learning_rate": 4.063254770432218e-06, "loss": 0.001, "step": 9090 }, { "epoch": 2.13, "learning_rate": 4.061215548282497e-06, "loss": 0.0281, "step": 9091 }, { "epoch": 2.13, "learning_rate": 4.0591767075797495e-06, "loss": 0.0169, "step": 9092 }, { "epoch": 2.13, "learning_rate": 4.057138248454928e-06, "loss": 0.0363, "step": 9093 }, { "epoch": 2.13, "learning_rate": 4.055100171038963e-06, "loss": 0.0012, "step": 9094 }, { "epoch": 2.13, "learning_rate": 4.053062475462753e-06, "loss": 0.0007, "step": 9095 }, { "epoch": 2.13, "learning_rate": 4.05102516185719e-06, "loss": 0.0213, "step": 9096 }, { "epoch": 2.14, "learning_rate": 4.048988230353123e-06, "loss": 0.0077, "step": 9097 }, { "epoch": 2.14, "learning_rate": 4.046951681081384e-06, "loss": 0.068, "step": 9098 }, { "epoch": 2.14, "learning_rate": 4.044915514172782e-06, "loss": 0.0227, "step": 9099 }, { "epoch": 2.14, "learning_rate": 4.042879729758098e-06, "loss": 0.0108, "step": 9100 }, { "epoch": 2.14, "learning_rate": 4.0408443279680976e-06, "loss": 0.0003, "step": 9101 }, { "epoch": 2.14, "learning_rate": 4.038809308933501e-06, "loss": 0.0024, "step": 9102 }, { "epoch": 2.14, "learning_rate": 4.036774672785027e-06, "loss": 0.0069, "step": 9103 }, { "epoch": 2.14, "learning_rate": 4.0347404196533545e-06, "loss": 0.0697, "step": 9104 }, { "epoch": 2.14, "learning_rate": 4.032706549669149e-06, "loss": 0.0703, "step": 9105 }, { "epoch": 2.14, "learning_rate": 4.030673062963041e-06, "loss": 0.0027, "step": 9106 }, { "epoch": 2.14, "learning_rate": 4.028639959665638e-06, "loss": 0.0024, "step": 9107 }, { "epoch": 2.14, "learning_rate": 4.026607239907535e-06, "loss": 0.0255, "step": 9108 }, { "epoch": 2.14, "learning_rate": 4.024574903819287e-06, "loss": 0.0293, "step": 9109 }, { "epoch": 2.14, "learning_rate": 4.0225429515314315e-06, "loss": 0.0738, "step": 9110 }, { "epoch": 2.14, "learning_rate": 4.020511383174478e-06, "loss": 0.0053, "step": 9111 }, { "epoch": 2.14, "learning_rate": 4.018480198878919e-06, "loss": 0.0061, "step": 9112 }, { "epoch": 2.14, "learning_rate": 4.016449398775215e-06, "loss": 0.0438, "step": 9113 }, { "epoch": 2.14, "learning_rate": 4.014418982993803e-06, "loss": 0.02, "step": 9114 }, { "epoch": 2.14, "learning_rate": 4.012388951665093e-06, "loss": 0.0073, "step": 9115 }, { "epoch": 2.14, "learning_rate": 4.010359304919476e-06, "loss": 0.0006, "step": 9116 }, { "epoch": 2.14, "learning_rate": 4.008330042887325e-06, "loss": 0.0024, "step": 9117 }, { "epoch": 2.14, "learning_rate": 4.006301165698962e-06, "loss": 0.0004, "step": 9118 }, { "epoch": 2.14, "learning_rate": 4.004272673484716e-06, "loss": 0.0978, "step": 9119 }, { "epoch": 2.14, "learning_rate": 4.002244566374867e-06, "loss": 0.0121, "step": 9120 }, { "epoch": 2.14, "learning_rate": 4.00021684449969e-06, "loss": 0.0102, "step": 9121 }, { "epoch": 2.14, "learning_rate": 3.998189507989413e-06, "loss": 0.0534, "step": 9122 }, { "epoch": 2.14, "learning_rate": 3.99616255697426e-06, "loss": 0.0089, "step": 9123 }, { "epoch": 2.14, "learning_rate": 3.994135991584416e-06, "loss": 0.0231, "step": 9124 }, { "epoch": 2.14, "learning_rate": 3.992109811950055e-06, "loss": 0.0711, "step": 9125 }, { "epoch": 2.14, "learning_rate": 3.990084018201311e-06, "loss": 0.0611, "step": 9126 }, { "epoch": 2.14, "learning_rate": 3.9880586104682995e-06, "loss": 0.0237, "step": 9127 }, { "epoch": 2.14, "learning_rate": 3.986033588881118e-06, "loss": 0.0009, "step": 9128 }, { "epoch": 2.14, "learning_rate": 3.984008953569832e-06, "loss": 0.0579, "step": 9129 }, { "epoch": 2.14, "learning_rate": 3.981984704664481e-06, "loss": 0.0118, "step": 9130 }, { "epoch": 2.14, "learning_rate": 3.97996084229508e-06, "loss": 0.0018, "step": 9131 }, { "epoch": 2.14, "learning_rate": 3.977937366591624e-06, "loss": 0.0379, "step": 9132 }, { "epoch": 2.14, "learning_rate": 3.975914277684085e-06, "loss": 0.0031, "step": 9133 }, { "epoch": 2.14, "learning_rate": 3.973891575702397e-06, "loss": 0.001, "step": 9134 }, { "epoch": 2.14, "learning_rate": 3.971869260776484e-06, "loss": 0.0011, "step": 9135 }, { "epoch": 2.14, "learning_rate": 3.9698473330362345e-06, "loss": 0.0082, "step": 9136 }, { "epoch": 2.14, "learning_rate": 3.967825792611525e-06, "loss": 0.0769, "step": 9137 }, { "epoch": 2.14, "learning_rate": 3.965804639632186e-06, "loss": 0.0097, "step": 9138 }, { "epoch": 2.14, "learning_rate": 3.963783874228046e-06, "loss": 0.0281, "step": 9139 }, { "epoch": 2.15, "learning_rate": 3.9617634965288895e-06, "loss": 0.0027, "step": 9140 }, { "epoch": 2.15, "learning_rate": 3.959743506664495e-06, "loss": 0.0136, "step": 9141 }, { "epoch": 2.15, "learning_rate": 3.957723904764601e-06, "loss": 0.0076, "step": 9142 }, { "epoch": 2.15, "learning_rate": 3.955704690958923e-06, "loss": 0.0254, "step": 9143 }, { "epoch": 2.15, "learning_rate": 3.9536858653771616e-06, "loss": 0.0344, "step": 9144 }, { "epoch": 2.15, "learning_rate": 3.951667428148982e-06, "loss": 0.0194, "step": 9145 }, { "epoch": 2.15, "learning_rate": 3.949649379404027e-06, "loss": 0.0358, "step": 9146 }, { "epoch": 2.15, "learning_rate": 3.947631719271914e-06, "loss": 0.0023, "step": 9147 }, { "epoch": 2.15, "learning_rate": 3.945614447882242e-06, "loss": 0.0201, "step": 9148 }, { "epoch": 2.15, "learning_rate": 3.943597565364578e-06, "loss": 0.0067, "step": 9149 }, { "epoch": 2.15, "learning_rate": 3.941581071848463e-06, "loss": 0.0019, "step": 9150 }, { "epoch": 2.15, "learning_rate": 3.939564967463419e-06, "loss": 0.0232, "step": 9151 }, { "epoch": 2.15, "learning_rate": 3.937549252338938e-06, "loss": 0.0003, "step": 9152 }, { "epoch": 2.15, "learning_rate": 3.935533926604498e-06, "loss": 0.0022, "step": 9153 }, { "epoch": 2.15, "learning_rate": 3.933518990389527e-06, "loss": 0.0561, "step": 9154 }, { "epoch": 2.15, "learning_rate": 3.9315044438234555e-06, "loss": 0.0167, "step": 9155 }, { "epoch": 2.15, "learning_rate": 3.929490287035671e-06, "loss": 0.0037, "step": 9156 }, { "epoch": 2.15, "learning_rate": 3.927476520155552e-06, "loss": 0.0013, "step": 9157 }, { "epoch": 2.15, "learning_rate": 3.92546314331243e-06, "loss": 0.0615, "step": 9158 }, { "epoch": 2.15, "learning_rate": 3.923450156635629e-06, "loss": 0.0125, "step": 9159 }, { "epoch": 2.15, "learning_rate": 3.921437560254447e-06, "loss": 0.0025, "step": 9160 }, { "epoch": 2.15, "learning_rate": 3.91942535429815e-06, "loss": 0.0105, "step": 9161 }, { "epoch": 2.15, "learning_rate": 3.91741353889598e-06, "loss": 0.0293, "step": 9162 }, { "epoch": 2.15, "learning_rate": 3.915402114177152e-06, "loss": 0.0002, "step": 9163 }, { "epoch": 2.15, "learning_rate": 3.913391080270869e-06, "loss": 0.0164, "step": 9164 }, { "epoch": 2.15, "learning_rate": 3.911380437306293e-06, "loss": 0.0015, "step": 9165 }, { "epoch": 2.15, "learning_rate": 3.909370185412564e-06, "loss": 0.0694, "step": 9166 }, { "epoch": 2.15, "learning_rate": 3.90736032471881e-06, "loss": 0.0409, "step": 9167 }, { "epoch": 2.15, "learning_rate": 3.905350855354113e-06, "loss": 0.0176, "step": 9168 }, { "epoch": 2.15, "learning_rate": 3.903341777447553e-06, "loss": 0.0329, "step": 9169 }, { "epoch": 2.15, "learning_rate": 3.901333091128159e-06, "loss": 0.006, "step": 9170 }, { "epoch": 2.15, "learning_rate": 3.8993247965249595e-06, "loss": 0.0004, "step": 9171 }, { "epoch": 2.15, "learning_rate": 3.8973168937669385e-06, "loss": 0.0033, "step": 9172 }, { "epoch": 2.15, "learning_rate": 3.895309382983074e-06, "loss": 0.0124, "step": 9173 }, { "epoch": 2.15, "learning_rate": 3.893302264302295e-06, "loss": 0.0064, "step": 9174 }, { "epoch": 2.15, "learning_rate": 3.891295537853525e-06, "loss": 0.0018, "step": 9175 }, { "epoch": 2.15, "learning_rate": 3.8892892037656594e-06, "loss": 0.0365, "step": 9176 }, { "epoch": 2.15, "learning_rate": 3.88728326216756e-06, "loss": 0.0002, "step": 9177 }, { "epoch": 2.15, "learning_rate": 3.885277713188068e-06, "loss": 0.02, "step": 9178 }, { "epoch": 2.15, "learning_rate": 3.883272556955997e-06, "loss": 0.0075, "step": 9179 }, { "epoch": 2.15, "learning_rate": 3.881267793600146e-06, "loss": 0.0022, "step": 9180 }, { "epoch": 2.15, "learning_rate": 3.879263423249274e-06, "loss": 0.0234, "step": 9181 }, { "epoch": 2.15, "learning_rate": 3.877259446032123e-06, "loss": 0.171, "step": 9182 }, { "epoch": 2.16, "learning_rate": 3.875255862077403e-06, "loss": 0.0595, "step": 9183 }, { "epoch": 2.16, "learning_rate": 3.87325267151381e-06, "loss": 0.0253, "step": 9184 }, { "epoch": 2.16, "learning_rate": 3.871249874470013e-06, "loss": 0.0636, "step": 9185 }, { "epoch": 2.16, "learning_rate": 3.86924747107464e-06, "loss": 0.0223, "step": 9186 }, { "epoch": 2.16, "learning_rate": 3.867245461456314e-06, "loss": 0.0011, "step": 9187 }, { "epoch": 2.16, "learning_rate": 3.865243845743616e-06, "loss": 0.0403, "step": 9188 }, { "epoch": 2.16, "learning_rate": 3.86324262406512e-06, "loss": 0.0135, "step": 9189 }, { "epoch": 2.16, "learning_rate": 3.861241796549352e-06, "loss": 0.01, "step": 9190 }, { "epoch": 2.16, "learning_rate": 3.8592413633248325e-06, "loss": 0.0014, "step": 9191 }, { "epoch": 2.16, "learning_rate": 3.857241324520043e-06, "loss": 0.008, "step": 9192 }, { "epoch": 2.16, "learning_rate": 3.855241680263455e-06, "loss": 0.0618, "step": 9193 }, { "epoch": 2.16, "learning_rate": 3.853242430683498e-06, "loss": 0.0615, "step": 9194 }, { "epoch": 2.16, "learning_rate": 3.851243575908582e-06, "loss": 0.007, "step": 9195 }, { "epoch": 2.16, "learning_rate": 3.8492451160670995e-06, "loss": 0.0468, "step": 9196 }, { "epoch": 2.16, "learning_rate": 3.847247051287408e-06, "loss": 0.0325, "step": 9197 }, { "epoch": 2.16, "learning_rate": 3.845249381697843e-06, "loss": 0.0238, "step": 9198 }, { "epoch": 2.16, "learning_rate": 3.843252107426711e-06, "loss": 0.0014, "step": 9199 }, { "epoch": 2.16, "learning_rate": 3.841255228602301e-06, "loss": 0.0299, "step": 9200 }, { "epoch": 2.16, "learning_rate": 3.839258745352872e-06, "loss": 0.0229, "step": 9201 }, { "epoch": 2.16, "learning_rate": 3.837262657806653e-06, "loss": 0.0182, "step": 9202 }, { "epoch": 2.16, "learning_rate": 3.835266966091859e-06, "loss": 0.0008, "step": 9203 }, { "epoch": 2.16, "learning_rate": 3.833271670336667e-06, "loss": 0.0228, "step": 9204 }, { "epoch": 2.16, "learning_rate": 3.8312767706692436e-06, "loss": 0.0031, "step": 9205 }, { "epoch": 2.16, "learning_rate": 3.829282267217706e-06, "loss": 0.0028, "step": 9206 }, { "epoch": 2.16, "learning_rate": 3.827288160110175e-06, "loss": 0.0109, "step": 9207 }, { "epoch": 2.16, "learning_rate": 3.8252944494747214e-06, "loss": 0.0028, "step": 9208 }, { "epoch": 2.16, "learning_rate": 3.823301135439409e-06, "loss": 0.0134, "step": 9209 }, { "epoch": 2.16, "learning_rate": 3.8213082181322635e-06, "loss": 0.0041, "step": 9210 }, { "epoch": 2.16, "learning_rate": 3.819315697681287e-06, "loss": 0.0068, "step": 9211 }, { "epoch": 2.16, "learning_rate": 3.817323574214465e-06, "loss": 0.0103, "step": 9212 }, { "epoch": 2.16, "learning_rate": 3.81533184785975e-06, "loss": 0.0478, "step": 9213 }, { "epoch": 2.16, "learning_rate": 3.813340518745067e-06, "loss": 0.0019, "step": 9214 }, { "epoch": 2.16, "learning_rate": 3.8113495869983176e-06, "loss": 0.0004, "step": 9215 }, { "epoch": 2.16, "learning_rate": 3.8093590527473846e-06, "loss": 0.0055, "step": 9216 }, { "epoch": 2.16, "learning_rate": 3.8073689161201157e-06, "loss": 0.0016, "step": 9217 }, { "epoch": 2.16, "learning_rate": 3.805379177244335e-06, "loss": 0.0309, "step": 9218 }, { "epoch": 2.16, "learning_rate": 3.8033898362478493e-06, "loss": 0.0305, "step": 9219 }, { "epoch": 2.16, "learning_rate": 3.8014008932584267e-06, "loss": 0.0104, "step": 9220 }, { "epoch": 2.16, "learning_rate": 3.799412348403827e-06, "loss": 0.001, "step": 9221 }, { "epoch": 2.16, "learning_rate": 3.7974242018117602e-06, "loss": 0.0352, "step": 9222 }, { "epoch": 2.16, "learning_rate": 3.7954364536099354e-06, "loss": 0.0369, "step": 9223 }, { "epoch": 2.16, "learning_rate": 3.793449103926017e-06, "loss": 0.0345, "step": 9224 }, { "epoch": 2.17, "learning_rate": 3.791462152887664e-06, "loss": 0.0022, "step": 9225 }, { "epoch": 2.17, "learning_rate": 3.789475600622483e-06, "loss": 0.0004, "step": 9226 }, { "epoch": 2.17, "learning_rate": 3.787489447258077e-06, "loss": 0.0159, "step": 9227 }, { "epoch": 2.17, "learning_rate": 3.78550369292202e-06, "loss": 0.0007, "step": 9228 }, { "epoch": 2.17, "learning_rate": 3.7835183377418537e-06, "loss": 0.018, "step": 9229 }, { "epoch": 2.17, "learning_rate": 3.781533381845096e-06, "loss": 0.0033, "step": 9230 }, { "epoch": 2.17, "learning_rate": 3.7795488253592372e-06, "loss": 0.0105, "step": 9231 }, { "epoch": 2.17, "learning_rate": 3.777564668411752e-06, "loss": 0.0281, "step": 9232 }, { "epoch": 2.17, "learning_rate": 3.775580911130078e-06, "loss": 0.0245, "step": 9233 }, { "epoch": 2.17, "learning_rate": 3.7735975536416335e-06, "loss": 0.0002, "step": 9234 }, { "epoch": 2.17, "learning_rate": 3.7716145960738037e-06, "loss": 0.0006, "step": 9235 }, { "epoch": 2.17, "learning_rate": 3.769632038553962e-06, "loss": 0.003, "step": 9236 }, { "epoch": 2.17, "learning_rate": 3.767649881209443e-06, "loss": 0.0006, "step": 9237 }, { "epoch": 2.17, "learning_rate": 3.7656681241675586e-06, "loss": 0.0008, "step": 9238 }, { "epoch": 2.17, "learning_rate": 3.763686767555601e-06, "loss": 0.0005, "step": 9239 }, { "epoch": 2.17, "learning_rate": 3.7617058115008276e-06, "loss": 0.0366, "step": 9240 }, { "epoch": 2.17, "learning_rate": 3.759725256130484e-06, "loss": 0.0096, "step": 9241 }, { "epoch": 2.17, "learning_rate": 3.757745101571768e-06, "loss": 0.0271, "step": 9242 }, { "epoch": 2.17, "learning_rate": 3.755765347951875e-06, "loss": 0.0014, "step": 9243 }, { "epoch": 2.17, "learning_rate": 3.753785995397956e-06, "loss": 0.0445, "step": 9244 }, { "epoch": 2.17, "learning_rate": 3.7518070440371512e-06, "loss": 0.001, "step": 9245 }, { "epoch": 2.17, "learning_rate": 3.749828493996567e-06, "loss": 0.0045, "step": 9246 }, { "epoch": 2.17, "learning_rate": 3.74785034540328e-06, "loss": 0.0022, "step": 9247 }, { "epoch": 2.17, "learning_rate": 3.7458725983843534e-06, "loss": 0.0346, "step": 9248 }, { "epoch": 2.17, "learning_rate": 3.7438952530668147e-06, "loss": 0.0055, "step": 9249 }, { "epoch": 2.17, "learning_rate": 3.741918309577668e-06, "loss": 0.0231, "step": 9250 }, { "epoch": 2.17, "learning_rate": 3.739941768043889e-06, "loss": 0.0105, "step": 9251 }, { "epoch": 2.17, "learning_rate": 3.737965628592436e-06, "loss": 0.006, "step": 9252 }, { "epoch": 2.17, "learning_rate": 3.7359898913502347e-06, "loss": 0.0758, "step": 9253 }, { "epoch": 2.17, "learning_rate": 3.734014556444181e-06, "loss": 0.0062, "step": 9254 }, { "epoch": 2.17, "learning_rate": 3.7320396240011593e-06, "loss": 0.0253, "step": 9255 }, { "epoch": 2.17, "learning_rate": 3.73006509414801e-06, "loss": 0.009, "step": 9256 }, { "epoch": 2.17, "learning_rate": 3.7280909670115674e-06, "loss": 0.0017, "step": 9257 }, { "epoch": 2.17, "learning_rate": 3.7261172427186175e-06, "loss": 0.002, "step": 9258 }, { "epoch": 2.17, "learning_rate": 3.724143921395941e-06, "loss": 0.0058, "step": 9259 }, { "epoch": 2.17, "learning_rate": 3.722171003170276e-06, "loss": 0.0197, "step": 9260 }, { "epoch": 2.17, "learning_rate": 3.7201984881683516e-06, "loss": 0.0004, "step": 9261 }, { "epoch": 2.17, "learning_rate": 3.7182263765168593e-06, "loss": 0.0067, "step": 9262 }, { "epoch": 2.17, "learning_rate": 3.7162546683424607e-06, "loss": 0.0029, "step": 9263 }, { "epoch": 2.17, "learning_rate": 3.714283363771808e-06, "loss": 0.0003, "step": 9264 }, { "epoch": 2.17, "learning_rate": 3.7123124629315123e-06, "loss": 0.0163, "step": 9265 }, { "epoch": 2.17, "learning_rate": 3.7103419659481652e-06, "loss": 0.0713, "step": 9266 }, { "epoch": 2.17, "learning_rate": 3.7083718729483287e-06, "loss": 0.0004, "step": 9267 }, { "epoch": 2.18, "learning_rate": 3.7064021840585474e-06, "loss": 0.0067, "step": 9268 }, { "epoch": 2.18, "learning_rate": 3.7044328994053303e-06, "loss": 0.0038, "step": 9269 }, { "epoch": 2.18, "learning_rate": 3.7024640191151606e-06, "loss": 0.0257, "step": 9270 }, { "epoch": 2.18, "learning_rate": 3.700495543314507e-06, "loss": 0.0168, "step": 9271 }, { "epoch": 2.18, "learning_rate": 3.6985274721298006e-06, "loss": 0.0096, "step": 9272 }, { "epoch": 2.18, "learning_rate": 3.6965598056874495e-06, "loss": 0.0085, "step": 9273 }, { "epoch": 2.18, "learning_rate": 3.6945925441138343e-06, "loss": 0.0156, "step": 9274 }, { "epoch": 2.18, "learning_rate": 3.6926256875353173e-06, "loss": 0.0591, "step": 9275 }, { "epoch": 2.18, "learning_rate": 3.6906592360782235e-06, "loss": 0.001, "step": 9276 }, { "epoch": 2.18, "learning_rate": 3.688693189868867e-06, "loss": 0.0067, "step": 9277 }, { "epoch": 2.18, "learning_rate": 3.6867275490335142e-06, "loss": 0.0172, "step": 9278 }, { "epoch": 2.18, "learning_rate": 3.6847623136984247e-06, "loss": 0.0016, "step": 9279 }, { "epoch": 2.18, "learning_rate": 3.682797483989826e-06, "loss": 0.0059, "step": 9280 }, { "epoch": 2.18, "learning_rate": 3.6808330600339182e-06, "loss": 0.0032, "step": 9281 }, { "epoch": 2.18, "learning_rate": 3.678869041956875e-06, "loss": 0.0364, "step": 9282 }, { "epoch": 2.18, "learning_rate": 3.6769054298848407e-06, "loss": 0.013, "step": 9283 }, { "epoch": 2.18, "learning_rate": 3.674942223943946e-06, "loss": 0.0057, "step": 9284 }, { "epoch": 2.18, "learning_rate": 3.6729794242602824e-06, "loss": 0.0174, "step": 9285 }, { "epoch": 2.18, "learning_rate": 3.6710170309599215e-06, "loss": 0.0079, "step": 9286 }, { "epoch": 2.18, "learning_rate": 3.6690550441689023e-06, "loss": 0.0049, "step": 9287 }, { "epoch": 2.18, "learning_rate": 3.6670934640132506e-06, "loss": 0.0148, "step": 9288 }, { "epoch": 2.18, "learning_rate": 3.6651322906189557e-06, "loss": 0.0034, "step": 9289 }, { "epoch": 2.18, "learning_rate": 3.663171524111979e-06, "loss": 0.0143, "step": 9290 }, { "epoch": 2.18, "learning_rate": 3.6612111646182668e-06, "loss": 0.0222, "step": 9291 }, { "epoch": 2.18, "learning_rate": 3.6592512122637304e-06, "loss": 0.0045, "step": 9292 }, { "epoch": 2.18, "learning_rate": 3.6572916671742555e-06, "loss": 0.0223, "step": 9293 }, { "epoch": 2.18, "learning_rate": 3.6553325294757015e-06, "loss": 0.0089, "step": 9294 }, { "epoch": 2.18, "learning_rate": 3.6533737992939043e-06, "loss": 0.0114, "step": 9295 }, { "epoch": 2.18, "learning_rate": 3.6514154767546794e-06, "loss": 0.0127, "step": 9296 }, { "epoch": 2.18, "learning_rate": 3.6494575619838047e-06, "loss": 0.001, "step": 9297 }, { "epoch": 2.18, "learning_rate": 3.6475000551070363e-06, "loss": 0.027, "step": 9298 }, { "epoch": 2.18, "learning_rate": 3.645542956250101e-06, "loss": 0.0324, "step": 9299 }, { "epoch": 2.18, "learning_rate": 3.643586265538711e-06, "loss": 0.0021, "step": 9300 }, { "epoch": 2.18, "learning_rate": 3.641629983098539e-06, "loss": 0.0184, "step": 9301 }, { "epoch": 2.18, "learning_rate": 3.639674109055238e-06, "loss": 0.0009, "step": 9302 }, { "epoch": 2.18, "learning_rate": 3.637718643534429e-06, "loss": 0.0011, "step": 9303 }, { "epoch": 2.18, "learning_rate": 3.635763586661718e-06, "loss": 0.0038, "step": 9304 }, { "epoch": 2.18, "learning_rate": 3.6338089385626762e-06, "loss": 0.009, "step": 9305 }, { "epoch": 2.18, "learning_rate": 3.6318546993628444e-06, "loss": 0.0077, "step": 9306 }, { "epoch": 2.18, "learning_rate": 3.6299008691877523e-06, "loss": 0.0231, "step": 9307 }, { "epoch": 2.18, "learning_rate": 3.6279474481628884e-06, "loss": 0.0206, "step": 9308 }, { "epoch": 2.18, "learning_rate": 3.6259944364137225e-06, "loss": 0.0042, "step": 9309 }, { "epoch": 2.18, "learning_rate": 3.6240418340656902e-06, "loss": 0.0522, "step": 9310 }, { "epoch": 2.19, "learning_rate": 3.6220896412442166e-06, "loss": 0.0284, "step": 9311 }, { "epoch": 2.19, "learning_rate": 3.6201378580746814e-06, "loss": 0.0011, "step": 9312 }, { "epoch": 2.19, "learning_rate": 3.6181864846824553e-06, "loss": 0.0111, "step": 9313 }, { "epoch": 2.19, "learning_rate": 3.6162355211928713e-06, "loss": 0.0002, "step": 9314 }, { "epoch": 2.19, "learning_rate": 3.6142849677312364e-06, "loss": 0.0029, "step": 9315 }, { "epoch": 2.19, "learning_rate": 3.6123348244228395e-06, "loss": 0.006, "step": 9316 }, { "epoch": 2.19, "learning_rate": 3.610385091392935e-06, "loss": 0.0189, "step": 9317 }, { "epoch": 2.19, "learning_rate": 3.6084357687667547e-06, "loss": 0.0037, "step": 9318 }, { "epoch": 2.19, "learning_rate": 3.6064868566695e-06, "loss": 0.0156, "step": 9319 }, { "epoch": 2.19, "learning_rate": 3.6045383552263546e-06, "loss": 0.0298, "step": 9320 }, { "epoch": 2.19, "learning_rate": 3.6025902645624687e-06, "loss": 0.0124, "step": 9321 }, { "epoch": 2.19, "learning_rate": 3.6006425848029624e-06, "loss": 0.0038, "step": 9322 }, { "epoch": 2.19, "learning_rate": 3.598695316072943e-06, "loss": 0.0098, "step": 9323 }, { "epoch": 2.19, "learning_rate": 3.5967484584974786e-06, "loss": 0.0295, "step": 9324 }, { "epoch": 2.19, "learning_rate": 3.594802012201617e-06, "loss": 0.0007, "step": 9325 }, { "epoch": 2.19, "learning_rate": 3.5928559773103743e-06, "loss": 0.0032, "step": 9326 }, { "epoch": 2.19, "learning_rate": 3.59091035394875e-06, "loss": 0.0206, "step": 9327 }, { "epoch": 2.19, "learning_rate": 3.5889651422417083e-06, "loss": 0.0293, "step": 9328 }, { "epoch": 2.19, "learning_rate": 3.587020342314189e-06, "loss": 0.0253, "step": 9329 }, { "epoch": 2.19, "learning_rate": 3.5850759542911028e-06, "loss": 0.0019, "step": 9330 }, { "epoch": 2.19, "learning_rate": 3.5831319782973427e-06, "loss": 0.0055, "step": 9331 }, { "epoch": 2.19, "learning_rate": 3.581188414457771e-06, "loss": 0.017, "step": 9332 }, { "epoch": 2.19, "learning_rate": 3.5792452628972205e-06, "loss": 0.0011, "step": 9333 }, { "epoch": 2.19, "learning_rate": 3.577302523740499e-06, "loss": 0.0046, "step": 9334 }, { "epoch": 2.19, "learning_rate": 3.575360197112384e-06, "loss": 0.0137, "step": 9335 }, { "epoch": 2.19, "learning_rate": 3.5734182831376398e-06, "loss": 0.0146, "step": 9336 }, { "epoch": 2.19, "learning_rate": 3.571476781940989e-06, "loss": 0.024, "step": 9337 }, { "epoch": 2.19, "learning_rate": 3.5695356936471316e-06, "loss": 0.0041, "step": 9338 }, { "epoch": 2.19, "learning_rate": 3.5675950183807505e-06, "loss": 0.0249, "step": 9339 }, { "epoch": 2.19, "learning_rate": 3.5656547562664925e-06, "loss": 0.0001, "step": 9340 }, { "epoch": 2.19, "learning_rate": 3.563714907428978e-06, "loss": 0.017, "step": 9341 }, { "epoch": 2.19, "learning_rate": 3.5617754719928e-06, "loss": 0.0116, "step": 9342 }, { "epoch": 2.19, "learning_rate": 3.5598364500825367e-06, "loss": 0.0004, "step": 9343 }, { "epoch": 2.19, "learning_rate": 3.557897841822726e-06, "loss": 0.0057, "step": 9344 }, { "epoch": 2.19, "learning_rate": 3.5559596473378854e-06, "loss": 0.0187, "step": 9345 }, { "epoch": 2.19, "learning_rate": 3.5540218667525008e-06, "loss": 0.0074, "step": 9346 }, { "epoch": 2.19, "learning_rate": 3.5520845001910385e-06, "loss": 0.0655, "step": 9347 }, { "epoch": 2.19, "learning_rate": 3.550147547777939e-06, "loss": 0.0377, "step": 9348 }, { "epoch": 2.19, "learning_rate": 3.5482110096376098e-06, "loss": 0.0063, "step": 9349 }, { "epoch": 2.19, "learning_rate": 3.546274885894433e-06, "loss": 0.0202, "step": 9350 }, { "epoch": 2.19, "learning_rate": 3.544339176672763e-06, "loss": 0.0012, "step": 9351 }, { "epoch": 2.19, "learning_rate": 3.5424038820969366e-06, "loss": 0.0221, "step": 9352 }, { "epoch": 2.2, "learning_rate": 3.5404690022912525e-06, "loss": 0.0416, "step": 9353 }, { "epoch": 2.2, "learning_rate": 3.5385345373799907e-06, "loss": 0.0012, "step": 9354 }, { "epoch": 2.2, "learning_rate": 3.536600487487395e-06, "loss": 0.042, "step": 9355 }, { "epoch": 2.2, "learning_rate": 3.534666852737698e-06, "loss": 0.0054, "step": 9356 }, { "epoch": 2.2, "learning_rate": 3.532733633255092e-06, "loss": 0.0062, "step": 9357 }, { "epoch": 2.2, "learning_rate": 3.530800829163745e-06, "loss": 0.0033, "step": 9358 }, { "epoch": 2.2, "learning_rate": 3.528868440587807e-06, "loss": 0.0099, "step": 9359 }, { "epoch": 2.2, "learning_rate": 3.5269364676513907e-06, "loss": 0.0008, "step": 9360 }, { "epoch": 2.2, "learning_rate": 3.5250049104785875e-06, "loss": 0.0021, "step": 9361 }, { "epoch": 2.2, "learning_rate": 3.523073769193457e-06, "loss": 0.0002, "step": 9362 }, { "epoch": 2.2, "learning_rate": 3.5211430439200434e-06, "loss": 0.0604, "step": 9363 }, { "epoch": 2.2, "learning_rate": 3.519212734782352e-06, "loss": 0.0244, "step": 9364 }, { "epoch": 2.2, "learning_rate": 3.517282841904365e-06, "loss": 0.0055, "step": 9365 }, { "epoch": 2.2, "learning_rate": 3.5153533654100435e-06, "loss": 0.0208, "step": 9366 }, { "epoch": 2.2, "learning_rate": 3.513424305423312e-06, "loss": 0.0028, "step": 9367 }, { "epoch": 2.2, "learning_rate": 3.5114956620680806e-06, "loss": 0.0305, "step": 9368 }, { "epoch": 2.2, "learning_rate": 3.5095674354682207e-06, "loss": 0.0041, "step": 9369 }, { "epoch": 2.2, "learning_rate": 3.5076396257475842e-06, "loss": 0.0075, "step": 9370 }, { "epoch": 2.2, "learning_rate": 3.5057122330299887e-06, "loss": 0.0048, "step": 9371 }, { "epoch": 2.2, "learning_rate": 3.5037852574392383e-06, "loss": 0.0242, "step": 9372 }, { "epoch": 2.2, "learning_rate": 3.501858699099098e-06, "loss": 0.0151, "step": 9373 }, { "epoch": 2.2, "learning_rate": 3.4999325581333068e-06, "loss": 0.031, "step": 9374 }, { "epoch": 2.2, "learning_rate": 3.4980068346655873e-06, "loss": 0.0469, "step": 9375 }, { "epoch": 2.2, "learning_rate": 3.496081528819626e-06, "loss": 0.0083, "step": 9376 }, { "epoch": 2.2, "learning_rate": 3.494156640719084e-06, "loss": 0.0746, "step": 9377 }, { "epoch": 2.2, "learning_rate": 3.4922321704875926e-06, "loss": 0.0382, "step": 9378 }, { "epoch": 2.2, "learning_rate": 3.4903081182487675e-06, "loss": 0.0227, "step": 9379 }, { "epoch": 2.2, "learning_rate": 3.488384484126187e-06, "loss": 0.0018, "step": 9380 }, { "epoch": 2.2, "learning_rate": 3.486461268243403e-06, "loss": 0.007, "step": 9381 }, { "epoch": 2.2, "learning_rate": 3.4845384707239483e-06, "loss": 0.049, "step": 9382 }, { "epoch": 2.2, "learning_rate": 3.4826160916913186e-06, "loss": 0.0182, "step": 9383 }, { "epoch": 2.2, "learning_rate": 3.4806941312689935e-06, "loss": 0.016, "step": 9384 }, { "epoch": 2.2, "learning_rate": 3.478772589580418e-06, "loss": 0.0272, "step": 9385 }, { "epoch": 2.2, "learning_rate": 3.4768514667490118e-06, "loss": 0.0037, "step": 9386 }, { "epoch": 2.2, "learning_rate": 3.4749307628981645e-06, "loss": 0.0631, "step": 9387 }, { "epoch": 2.2, "learning_rate": 3.4730104781512485e-06, "loss": 0.003, "step": 9388 }, { "epoch": 2.2, "learning_rate": 3.4710906126316023e-06, "loss": 0.002, "step": 9389 }, { "epoch": 2.2, "learning_rate": 3.469171166462533e-06, "loss": 0.0004, "step": 9390 }, { "epoch": 2.2, "learning_rate": 3.4672521397673343e-06, "loss": 0.0093, "step": 9391 }, { "epoch": 2.2, "learning_rate": 3.4653335326692604e-06, "loss": 0.0053, "step": 9392 }, { "epoch": 2.2, "learning_rate": 3.4634153452915443e-06, "loss": 0.0036, "step": 9393 }, { "epoch": 2.2, "learning_rate": 3.4614975777573866e-06, "loss": 0.0109, "step": 9394 }, { "epoch": 2.2, "learning_rate": 3.459580230189973e-06, "loss": 0.0362, "step": 9395 }, { "epoch": 2.21, "learning_rate": 3.4576633027124496e-06, "loss": 0.0054, "step": 9396 }, { "epoch": 2.21, "learning_rate": 3.4557467954479396e-06, "loss": 0.0359, "step": 9397 }, { "epoch": 2.21, "learning_rate": 3.453830708519539e-06, "loss": 0.0117, "step": 9398 }, { "epoch": 2.21, "learning_rate": 3.45191504205032e-06, "loss": 0.0004, "step": 9399 }, { "epoch": 2.21, "learning_rate": 3.4499997961633314e-06, "loss": 0.0001, "step": 9400 }, { "epoch": 2.21, "learning_rate": 3.4480849709815756e-06, "loss": 0.0378, "step": 9401 }, { "epoch": 2.21, "learning_rate": 3.4461705666280532e-06, "loss": 0.005, "step": 9402 }, { "epoch": 2.21, "learning_rate": 3.444256583225718e-06, "loss": 0.016, "step": 9403 }, { "epoch": 2.21, "learning_rate": 3.4423430208975117e-06, "loss": 0.0289, "step": 9404 }, { "epoch": 2.21, "learning_rate": 3.4404298797663384e-06, "loss": 0.055, "step": 9405 }, { "epoch": 2.21, "learning_rate": 3.4385171599550795e-06, "loss": 0.0001, "step": 9406 }, { "epoch": 2.21, "learning_rate": 3.4366048615865843e-06, "loss": 0.025, "step": 9407 }, { "epoch": 2.21, "learning_rate": 3.434692984783686e-06, "loss": 0.0026, "step": 9408 }, { "epoch": 2.21, "learning_rate": 3.4327815296691825e-06, "loss": 0.0251, "step": 9409 }, { "epoch": 2.21, "learning_rate": 3.4308704963658413e-06, "loss": 0.0017, "step": 9410 }, { "epoch": 2.21, "learning_rate": 3.4289598849964155e-06, "loss": 0.0003, "step": 9411 }, { "epoch": 2.21, "learning_rate": 3.4270496956836186e-06, "loss": 0.0464, "step": 9412 }, { "epoch": 2.21, "learning_rate": 3.425139928550142e-06, "loss": 0.0019, "step": 9413 }, { "epoch": 2.21, "learning_rate": 3.4232305837186475e-06, "loss": 0.0034, "step": 9414 }, { "epoch": 2.21, "learning_rate": 3.421321661311774e-06, "loss": 0.0041, "step": 9415 }, { "epoch": 2.21, "learning_rate": 3.4194131614521377e-06, "loss": 0.0275, "step": 9416 }, { "epoch": 2.21, "learning_rate": 3.4175050842623093e-06, "loss": 0.0029, "step": 9417 }, { "epoch": 2.21, "learning_rate": 3.415597429864853e-06, "loss": 0.0139, "step": 9418 }, { "epoch": 2.21, "learning_rate": 3.4136901983822912e-06, "loss": 0.0261, "step": 9419 }, { "epoch": 2.21, "learning_rate": 3.4117833899371313e-06, "loss": 0.0345, "step": 9420 }, { "epoch": 2.21, "learning_rate": 3.4098770046518424e-06, "loss": 0.0384, "step": 9421 }, { "epoch": 2.21, "learning_rate": 3.4079710426488743e-06, "loss": 0.0464, "step": 9422 }, { "epoch": 2.21, "learning_rate": 3.4060655040506417e-06, "loss": 0.0285, "step": 9423 }, { "epoch": 2.21, "learning_rate": 3.4041603889795426e-06, "loss": 0.0273, "step": 9424 }, { "epoch": 2.21, "learning_rate": 3.4022556975579403e-06, "loss": 0.0053, "step": 9425 }, { "epoch": 2.21, "learning_rate": 3.4003514299081696e-06, "loss": 0.0059, "step": 9426 }, { "epoch": 2.21, "learning_rate": 3.398447586152547e-06, "loss": 0.0067, "step": 9427 }, { "epoch": 2.21, "learning_rate": 3.3965441664133524e-06, "loss": 0.0166, "step": 9428 }, { "epoch": 2.21, "learning_rate": 3.3946411708128426e-06, "loss": 0.0525, "step": 9429 }, { "epoch": 2.21, "learning_rate": 3.392738599473244e-06, "loss": 0.0057, "step": 9430 }, { "epoch": 2.21, "learning_rate": 3.3908364525167637e-06, "loss": 0.0232, "step": 9431 }, { "epoch": 2.21, "learning_rate": 3.3889347300655737e-06, "loss": 0.0224, "step": 9432 }, { "epoch": 2.21, "learning_rate": 3.3870334322418187e-06, "loss": 0.0035, "step": 9433 }, { "epoch": 2.21, "learning_rate": 3.385132559167623e-06, "loss": 0.0004, "step": 9434 }, { "epoch": 2.21, "learning_rate": 3.3832321109650757e-06, "loss": 0.0001, "step": 9435 }, { "epoch": 2.21, "learning_rate": 3.3813320877562494e-06, "loss": 0.0002, "step": 9436 }, { "epoch": 2.21, "learning_rate": 3.3794324896631703e-06, "loss": 0.0088, "step": 9437 }, { "epoch": 2.22, "learning_rate": 3.3775333168078594e-06, "loss": 0.0049, "step": 9438 }, { "epoch": 2.22, "learning_rate": 3.3756345693122937e-06, "loss": 0.001, "step": 9439 }, { "epoch": 2.22, "learning_rate": 3.3737362472984346e-06, "loss": 0.023, "step": 9440 }, { "epoch": 2.22, "learning_rate": 3.371838350888209e-06, "loss": 0.0019, "step": 9441 }, { "epoch": 2.22, "learning_rate": 3.369940880203515e-06, "loss": 0.0077, "step": 9442 }, { "epoch": 2.22, "learning_rate": 3.3680438353662313e-06, "loss": 0.0072, "step": 9443 }, { "epoch": 2.22, "learning_rate": 3.3661472164982057e-06, "loss": 0.0074, "step": 9444 }, { "epoch": 2.22, "learning_rate": 3.3642510237212532e-06, "loss": 0.0065, "step": 9445 }, { "epoch": 2.22, "learning_rate": 3.362355257157165e-06, "loss": 0.0032, "step": 9446 }, { "epoch": 2.22, "learning_rate": 3.3604599169277117e-06, "loss": 0.0137, "step": 9447 }, { "epoch": 2.22, "learning_rate": 3.358565003154629e-06, "loss": 0.0013, "step": 9448 }, { "epoch": 2.22, "learning_rate": 3.3566705159596234e-06, "loss": 0.005, "step": 9449 }, { "epoch": 2.22, "learning_rate": 3.354776455464378e-06, "loss": 0.006, "step": 9450 }, { "epoch": 2.22, "learning_rate": 3.3528828217905485e-06, "loss": 0.0135, "step": 9451 }, { "epoch": 2.22, "learning_rate": 3.35098961505977e-06, "loss": 0.0022, "step": 9452 }, { "epoch": 2.22, "learning_rate": 3.3490968353936292e-06, "loss": 0.0018, "step": 9453 }, { "epoch": 2.22, "learning_rate": 3.347204482913711e-06, "loss": 0.0221, "step": 9454 }, { "epoch": 2.22, "learning_rate": 3.345312557741551e-06, "loss": 0.0179, "step": 9455 }, { "epoch": 2.22, "learning_rate": 3.3434210599986793e-06, "loss": 0.0156, "step": 9456 }, { "epoch": 2.22, "learning_rate": 3.3415299898065722e-06, "loss": 0.0172, "step": 9457 }, { "epoch": 2.22, "learning_rate": 3.3396393472867006e-06, "loss": 0.0023, "step": 9458 }, { "epoch": 2.22, "learning_rate": 3.3377491325605026e-06, "loss": 0.0088, "step": 9459 }, { "epoch": 2.22, "learning_rate": 3.3358593457493827e-06, "loss": 0.0012, "step": 9460 }, { "epoch": 2.22, "learning_rate": 3.3339699869747223e-06, "loss": 0.0377, "step": 9461 }, { "epoch": 2.22, "learning_rate": 3.33208105635787e-06, "loss": 0.0002, "step": 9462 }, { "epoch": 2.22, "learning_rate": 3.3301925540201583e-06, "loss": 0.0122, "step": 9463 }, { "epoch": 2.22, "learning_rate": 3.3283044800828844e-06, "loss": 0.0213, "step": 9464 }, { "epoch": 2.22, "learning_rate": 3.3264168346673166e-06, "loss": 0.0003, "step": 9465 }, { "epoch": 2.22, "learning_rate": 3.324529617894694e-06, "loss": 0.0007, "step": 9466 }, { "epoch": 2.22, "learning_rate": 3.322642829886238e-06, "loss": 0.0018, "step": 9467 }, { "epoch": 2.22, "learning_rate": 3.3207564707631414e-06, "loss": 0.0008, "step": 9468 }, { "epoch": 2.22, "learning_rate": 3.3188705406465525e-06, "loss": 0.0119, "step": 9469 }, { "epoch": 2.22, "learning_rate": 3.316985039657613e-06, "loss": 0.0002, "step": 9470 }, { "epoch": 2.22, "learning_rate": 3.3150999679174225e-06, "loss": 0.0411, "step": 9471 }, { "epoch": 2.22, "learning_rate": 3.313215325547069e-06, "loss": 0.0107, "step": 9472 }, { "epoch": 2.22, "learning_rate": 3.3113311126675883e-06, "loss": 0.0099, "step": 9473 }, { "epoch": 2.22, "learning_rate": 3.309447329400014e-06, "loss": 0.0474, "step": 9474 }, { "epoch": 2.22, "learning_rate": 3.3075639758653333e-06, "loss": 0.002, "step": 9475 }, { "epoch": 2.22, "learning_rate": 3.3056810521845217e-06, "loss": 0.004, "step": 9476 }, { "epoch": 2.22, "learning_rate": 3.303798558478515e-06, "loss": 0.0388, "step": 9477 }, { "epoch": 2.22, "learning_rate": 3.3019164948682223e-06, "loss": 0.0782, "step": 9478 }, { "epoch": 2.22, "learning_rate": 3.300034861474535e-06, "loss": 0.0138, "step": 9479 }, { "epoch": 2.22, "learning_rate": 3.2981536584183062e-06, "loss": 0.0259, "step": 9480 }, { "epoch": 2.23, "learning_rate": 3.2962728858203653e-06, "loss": 0.0114, "step": 9481 }, { "epoch": 2.23, "learning_rate": 3.294392543801511e-06, "loss": 0.0222, "step": 9482 }, { "epoch": 2.23, "learning_rate": 3.292512632482524e-06, "loss": 0.0033, "step": 9483 }, { "epoch": 2.23, "learning_rate": 3.290633151984146e-06, "loss": 0.001, "step": 9484 }, { "epoch": 2.23, "learning_rate": 3.2887541024270943e-06, "loss": 0.0183, "step": 9485 }, { "epoch": 2.23, "learning_rate": 3.286875483932065e-06, "loss": 0.0041, "step": 9486 }, { "epoch": 2.23, "learning_rate": 3.2849972966197162e-06, "loss": 0.0032, "step": 9487 }, { "epoch": 2.23, "learning_rate": 3.283119540610692e-06, "loss": 0.0231, "step": 9488 }, { "epoch": 2.23, "learning_rate": 3.2812422160255874e-06, "loss": 0.0053, "step": 9489 }, { "epoch": 2.23, "learning_rate": 3.2793653229849944e-06, "loss": 0.0424, "step": 9490 }, { "epoch": 2.23, "learning_rate": 3.277488861609456e-06, "loss": 0.0356, "step": 9491 }, { "epoch": 2.23, "learning_rate": 3.275612832019508e-06, "loss": 0.0513, "step": 9492 }, { "epoch": 2.23, "learning_rate": 3.2737372343356345e-06, "loss": 0.0082, "step": 9493 }, { "epoch": 2.23, "learning_rate": 3.2718620686783108e-06, "loss": 0.0343, "step": 9494 }, { "epoch": 2.23, "learning_rate": 3.2699873351679822e-06, "loss": 0.0252, "step": 9495 }, { "epoch": 2.23, "learning_rate": 3.2681130339250598e-06, "loss": 0.0297, "step": 9496 }, { "epoch": 2.23, "learning_rate": 3.2662391650699288e-06, "loss": 0.0003, "step": 9497 }, { "epoch": 2.23, "learning_rate": 3.2643657287229445e-06, "loss": 0.0015, "step": 9498 }, { "epoch": 2.23, "learning_rate": 3.2624927250044434e-06, "loss": 0.0232, "step": 9499 }, { "epoch": 2.23, "learning_rate": 3.2606201540347248e-06, "loss": 0.0454, "step": 9500 }, { "epoch": 2.23, "learning_rate": 3.2587480159340614e-06, "loss": 0.0018, "step": 9501 }, { "epoch": 2.23, "learning_rate": 3.2568763108227052e-06, "loss": 0.0125, "step": 9502 }, { "epoch": 2.23, "learning_rate": 3.25500503882087e-06, "loss": 0.0001, "step": 9503 }, { "epoch": 2.23, "learning_rate": 3.2531342000487563e-06, "loss": 0.0106, "step": 9504 }, { "epoch": 2.23, "learning_rate": 3.2512637946265167e-06, "loss": 0.062, "step": 9505 }, { "epoch": 2.23, "learning_rate": 3.2493938226742937e-06, "loss": 0.0026, "step": 9506 }, { "epoch": 2.23, "learning_rate": 3.247524284312191e-06, "loss": 0.026, "step": 9507 }, { "epoch": 2.23, "learning_rate": 3.2456551796602984e-06, "loss": 0.0215, "step": 9508 }, { "epoch": 2.23, "learning_rate": 3.243786508838653e-06, "loss": 0.0004, "step": 9509 }, { "epoch": 2.23, "learning_rate": 3.241918271967288e-06, "loss": 0.0004, "step": 9510 }, { "epoch": 2.23, "learning_rate": 3.2400504691662026e-06, "loss": 0.034, "step": 9511 }, { "epoch": 2.23, "learning_rate": 3.238183100555361e-06, "loss": 0.0009, "step": 9512 }, { "epoch": 2.23, "learning_rate": 3.2363161662547047e-06, "loss": 0.0278, "step": 9513 }, { "epoch": 2.23, "learning_rate": 3.234449666384144e-06, "loss": 0.009, "step": 9514 }, { "epoch": 2.23, "learning_rate": 3.2325836010635693e-06, "loss": 0.0043, "step": 9515 }, { "epoch": 2.23, "learning_rate": 3.2307179704128345e-06, "loss": 0.0257, "step": 9516 }, { "epoch": 2.23, "learning_rate": 3.2288527745517684e-06, "loss": 0.0023, "step": 9517 }, { "epoch": 2.23, "learning_rate": 3.2269880136001697e-06, "loss": 0.0035, "step": 9518 }, { "epoch": 2.23, "learning_rate": 3.2251236876778147e-06, "loss": 0.0004, "step": 9519 }, { "epoch": 2.23, "learning_rate": 3.2232597969044554e-06, "loss": 0.051, "step": 9520 }, { "epoch": 2.23, "learning_rate": 3.2213963413997953e-06, "loss": 0.0227, "step": 9521 }, { "epoch": 2.23, "learning_rate": 3.2195333212835344e-06, "loss": 0.0045, "step": 9522 }, { "epoch": 2.23, "learning_rate": 3.217670736675327e-06, "loss": 0.0047, "step": 9523 }, { "epoch": 2.24, "learning_rate": 3.2158085876948166e-06, "loss": 0.0242, "step": 9524 }, { "epoch": 2.24, "learning_rate": 3.2139468744615964e-06, "loss": 0.0047, "step": 9525 }, { "epoch": 2.24, "learning_rate": 3.2120855970952513e-06, "loss": 0.013, "step": 9526 }, { "epoch": 2.24, "learning_rate": 3.2102247557153277e-06, "loss": 0.0063, "step": 9527 }, { "epoch": 2.24, "learning_rate": 3.208364350441351e-06, "loss": 0.0201, "step": 9528 }, { "epoch": 2.24, "learning_rate": 3.206504381392811e-06, "loss": 0.0217, "step": 9529 }, { "epoch": 2.24, "learning_rate": 3.2046448486891725e-06, "loss": 0.0001, "step": 9530 }, { "epoch": 2.24, "learning_rate": 3.2027857524498775e-06, "loss": 0.0158, "step": 9531 }, { "epoch": 2.24, "learning_rate": 3.2009270927943324e-06, "loss": 0.0157, "step": 9532 }, { "epoch": 2.24, "learning_rate": 3.199068869841919e-06, "loss": 0.0032, "step": 9533 }, { "epoch": 2.24, "learning_rate": 3.1972110837119864e-06, "loss": 0.0043, "step": 9534 }, { "epoch": 2.24, "learning_rate": 3.1953537345238683e-06, "loss": 0.0081, "step": 9535 }, { "epoch": 2.24, "learning_rate": 3.1934968223968567e-06, "loss": 0.0252, "step": 9536 }, { "epoch": 2.24, "learning_rate": 3.191640347450218e-06, "loss": 0.0327, "step": 9537 }, { "epoch": 2.24, "learning_rate": 3.1897843098032e-06, "loss": 0.0005, "step": 9538 }, { "epoch": 2.24, "learning_rate": 3.187928709575009e-06, "loss": 0.0009, "step": 9539 }, { "epoch": 2.24, "learning_rate": 3.1860735468848393e-06, "loss": 0.0065, "step": 9540 }, { "epoch": 2.24, "learning_rate": 3.1842188218518355e-06, "loss": 0.01, "step": 9541 }, { "epoch": 2.24, "learning_rate": 3.1823645345951348e-06, "loss": 0.0692, "step": 9542 }, { "epoch": 2.24, "learning_rate": 3.180510685233832e-06, "loss": 0.0038, "step": 9543 }, { "epoch": 2.24, "learning_rate": 3.1786572738870058e-06, "loss": 0.0709, "step": 9544 }, { "epoch": 2.24, "learning_rate": 3.176804300673697e-06, "loss": 0.0072, "step": 9545 }, { "epoch": 2.24, "learning_rate": 3.174951765712919e-06, "loss": 0.0008, "step": 9546 }, { "epoch": 2.24, "learning_rate": 3.173099669123666e-06, "loss": 0.0471, "step": 9547 }, { "epoch": 2.24, "learning_rate": 3.1712480110248943e-06, "loss": 0.0025, "step": 9548 }, { "epoch": 2.24, "learning_rate": 3.169396791535536e-06, "loss": 0.0181, "step": 9549 }, { "epoch": 2.24, "learning_rate": 3.1675460107744905e-06, "loss": 0.0017, "step": 9550 }, { "epoch": 2.24, "learning_rate": 3.165695668860641e-06, "loss": 0.0323, "step": 9551 }, { "epoch": 2.24, "learning_rate": 3.1638457659128296e-06, "loss": 0.0014, "step": 9552 }, { "epoch": 2.24, "learning_rate": 3.161996302049873e-06, "loss": 0.0508, "step": 9553 }, { "epoch": 2.24, "learning_rate": 3.1601472773905685e-06, "loss": 0.0039, "step": 9554 }, { "epoch": 2.24, "learning_rate": 3.1582986920536716e-06, "loss": 0.0242, "step": 9555 }, { "epoch": 2.24, "learning_rate": 3.1564505461579265e-06, "loss": 0.0021, "step": 9556 }, { "epoch": 2.24, "learning_rate": 3.1546028398220265e-06, "loss": 0.0269, "step": 9557 }, { "epoch": 2.24, "learning_rate": 3.1527555731646588e-06, "loss": 0.0497, "step": 9558 }, { "epoch": 2.24, "learning_rate": 3.1509087463044664e-06, "loss": 0.0044, "step": 9559 }, { "epoch": 2.24, "learning_rate": 3.149062359360081e-06, "loss": 0.0009, "step": 9560 }, { "epoch": 2.24, "learning_rate": 3.1472164124500828e-06, "loss": 0.0192, "step": 9561 }, { "epoch": 2.24, "learning_rate": 3.1453709056930413e-06, "loss": 0.0005, "step": 9562 }, { "epoch": 2.24, "learning_rate": 3.1435258392074984e-06, "loss": 0.0333, "step": 9563 }, { "epoch": 2.24, "learning_rate": 3.1416812131119577e-06, "loss": 0.0247, "step": 9564 }, { "epoch": 2.24, "learning_rate": 3.1398370275249014e-06, "loss": 0.0149, "step": 9565 }, { "epoch": 2.25, "learning_rate": 3.137993282564775e-06, "loss": 0.0052, "step": 9566 }, { "epoch": 2.25, "learning_rate": 3.1361499783500094e-06, "loss": 0.0115, "step": 9567 }, { "epoch": 2.25, "learning_rate": 3.134307114998998e-06, "loss": 0.0043, "step": 9568 }, { "epoch": 2.25, "learning_rate": 3.132464692630105e-06, "loss": 0.0114, "step": 9569 }, { "epoch": 2.25, "learning_rate": 3.130622711361667e-06, "loss": 0.0288, "step": 9570 }, { "epoch": 2.25, "learning_rate": 3.128781171312002e-06, "loss": 0.0368, "step": 9571 }, { "epoch": 2.25, "learning_rate": 3.126940072599385e-06, "loss": 0.0109, "step": 9572 }, { "epoch": 2.25, "learning_rate": 3.125099415342069e-06, "loss": 0.024, "step": 9573 }, { "epoch": 2.25, "learning_rate": 3.123259199658285e-06, "loss": 0.003, "step": 9574 }, { "epoch": 2.25, "learning_rate": 3.1214194256662223e-06, "loss": 0.0211, "step": 9575 }, { "epoch": 2.25, "learning_rate": 3.11958009348406e-06, "loss": 0.0039, "step": 9576 }, { "epoch": 2.25, "learning_rate": 3.117741203229925e-06, "loss": 0.003, "step": 9577 }, { "epoch": 2.25, "learning_rate": 3.1159027550219344e-06, "loss": 0.0575, "step": 9578 }, { "epoch": 2.25, "learning_rate": 3.114064748978175e-06, "loss": 0.0007, "step": 9579 }, { "epoch": 2.25, "learning_rate": 3.1122271852166984e-06, "loss": 0.0416, "step": 9580 }, { "epoch": 2.25, "learning_rate": 3.1103900638555316e-06, "loss": 0.0103, "step": 9581 }, { "epoch": 2.25, "learning_rate": 3.108553385012669e-06, "loss": 0.0096, "step": 9582 }, { "epoch": 2.25, "learning_rate": 3.1067171488060855e-06, "loss": 0.0183, "step": 9583 }, { "epoch": 2.25, "learning_rate": 3.1048813553537205e-06, "loss": 0.0066, "step": 9584 }, { "epoch": 2.25, "learning_rate": 3.1030460047734857e-06, "loss": 0.0101, "step": 9585 }, { "epoch": 2.25, "learning_rate": 3.101211097183263e-06, "loss": 0.0714, "step": 9586 }, { "epoch": 2.25, "learning_rate": 3.0993766327009133e-06, "loss": 0.0687, "step": 9587 }, { "epoch": 2.25, "learning_rate": 3.097542611444262e-06, "loss": 0.0072, "step": 9588 }, { "epoch": 2.25, "learning_rate": 3.0957090335311035e-06, "loss": 0.0028, "step": 9589 }, { "epoch": 2.25, "learning_rate": 3.093875899079215e-06, "loss": 0.0148, "step": 9590 }, { "epoch": 2.25, "learning_rate": 3.0920432082063324e-06, "loss": 0.0588, "step": 9591 }, { "epoch": 2.25, "learning_rate": 3.090210961030179e-06, "loss": 0.0115, "step": 9592 }, { "epoch": 2.25, "learning_rate": 3.088379157668425e-06, "loss": 0.0028, "step": 9593 }, { "epoch": 2.25, "learning_rate": 3.086547798238739e-06, "loss": 0.0441, "step": 9594 }, { "epoch": 2.25, "learning_rate": 3.0847168828587406e-06, "loss": 0.0097, "step": 9595 }, { "epoch": 2.25, "learning_rate": 3.0828864116460355e-06, "loss": 0.0242, "step": 9596 }, { "epoch": 2.25, "learning_rate": 3.081056384718193e-06, "loss": 0.0462, "step": 9597 }, { "epoch": 2.25, "learning_rate": 3.0792268021927497e-06, "loss": 0.061, "step": 9598 }, { "epoch": 2.25, "learning_rate": 3.077397664187228e-06, "loss": 0.0068, "step": 9599 }, { "epoch": 2.25, "learning_rate": 3.0755689708191085e-06, "loss": 0.04, "step": 9600 }, { "epoch": 2.25, "learning_rate": 3.073740722205849e-06, "loss": 0.0122, "step": 9601 }, { "epoch": 2.25, "learning_rate": 3.071912918464873e-06, "loss": 0.0718, "step": 9602 }, { "epoch": 2.25, "learning_rate": 3.070085559713587e-06, "loss": 0.0245, "step": 9603 }, { "epoch": 2.25, "learning_rate": 3.0682586460693585e-06, "loss": 0.0175, "step": 9604 }, { "epoch": 2.25, "learning_rate": 3.0664321776495274e-06, "loss": 0.0519, "step": 9605 }, { "epoch": 2.25, "learning_rate": 3.0646061545714134e-06, "loss": 0.0215, "step": 9606 }, { "epoch": 2.25, "learning_rate": 3.0627805769522967e-06, "loss": 0.0002, "step": 9607 }, { "epoch": 2.25, "learning_rate": 3.060955444909437e-06, "loss": 0.0037, "step": 9608 }, { "epoch": 2.26, "learning_rate": 3.059130758560056e-06, "loss": 0.0037, "step": 9609 }, { "epoch": 2.26, "learning_rate": 3.0573065180213614e-06, "loss": 0.0006, "step": 9610 }, { "epoch": 2.26, "learning_rate": 3.0554827234105165e-06, "loss": 0.0335, "step": 9611 }, { "epoch": 2.26, "learning_rate": 3.053659374844673e-06, "loss": 0.0009, "step": 9612 }, { "epoch": 2.26, "learning_rate": 3.0518364724409322e-06, "loss": 0.0079, "step": 9613 }, { "epoch": 2.26, "learning_rate": 3.0500140163163838e-06, "loss": 0.0081, "step": 9614 }, { "epoch": 2.26, "learning_rate": 3.0481920065880877e-06, "loss": 0.068, "step": 9615 }, { "epoch": 2.26, "learning_rate": 3.046370443373068e-06, "loss": 0.0007, "step": 9616 }, { "epoch": 2.26, "learning_rate": 3.0445493267883232e-06, "loss": 0.0038, "step": 9617 }, { "epoch": 2.26, "learning_rate": 3.0427286569508198e-06, "loss": 0.0036, "step": 9618 }, { "epoch": 2.26, "learning_rate": 3.0409084339775065e-06, "loss": 0.0011, "step": 9619 }, { "epoch": 2.26, "learning_rate": 3.039088657985291e-06, "loss": 0.0241, "step": 9620 }, { "epoch": 2.26, "learning_rate": 3.037269329091056e-06, "loss": 0.0484, "step": 9621 }, { "epoch": 2.26, "learning_rate": 3.0354504474116617e-06, "loss": 0.0657, "step": 9622 }, { "epoch": 2.26, "learning_rate": 3.033632013063932e-06, "loss": 0.1231, "step": 9623 }, { "epoch": 2.26, "learning_rate": 3.0318140261646634e-06, "loss": 0.0474, "step": 9624 }, { "epoch": 2.26, "learning_rate": 3.029996486830623e-06, "loss": 0.1177, "step": 9625 }, { "epoch": 2.26, "learning_rate": 3.0281793951785566e-06, "loss": 0.0048, "step": 9626 }, { "epoch": 2.26, "learning_rate": 3.026362751325169e-06, "loss": 0.0157, "step": 9627 }, { "epoch": 2.26, "learning_rate": 3.0245465553871546e-06, "loss": 0.0027, "step": 9628 }, { "epoch": 2.26, "learning_rate": 3.022730807481151e-06, "loss": 0.0007, "step": 9629 }, { "epoch": 2.26, "learning_rate": 3.020915507723793e-06, "loss": 0.0009, "step": 9630 }, { "epoch": 2.26, "learning_rate": 3.019100656231678e-06, "loss": 0.024, "step": 9631 }, { "epoch": 2.26, "learning_rate": 3.017286253121372e-06, "loss": 0.0162, "step": 9632 }, { "epoch": 2.26, "learning_rate": 3.015472298509412e-06, "loss": 0.0366, "step": 9633 }, { "epoch": 2.26, "learning_rate": 3.0136587925123063e-06, "loss": 0.0096, "step": 9634 }, { "epoch": 2.26, "learning_rate": 3.0118457352465413e-06, "loss": 0.0019, "step": 9635 }, { "epoch": 2.26, "learning_rate": 3.0100331268285664e-06, "loss": 0.0034, "step": 9636 }, { "epoch": 2.26, "learning_rate": 3.0082209673748063e-06, "loss": 0.0317, "step": 9637 }, { "epoch": 2.26, "learning_rate": 3.00640925700165e-06, "loss": 0.0118, "step": 9638 }, { "epoch": 2.26, "learning_rate": 3.004597995825472e-06, "loss": 0.0588, "step": 9639 }, { "epoch": 2.26, "learning_rate": 3.0027871839626056e-06, "loss": 0.0068, "step": 9640 }, { "epoch": 2.26, "learning_rate": 3.0009768215293543e-06, "loss": 0.011, "step": 9641 }, { "epoch": 2.26, "learning_rate": 2.999166908642006e-06, "loss": 0.0014, "step": 9642 }, { "epoch": 2.26, "learning_rate": 2.9973574454168054e-06, "loss": 0.0224, "step": 9643 }, { "epoch": 2.26, "learning_rate": 2.9955484319699755e-06, "loss": 0.0032, "step": 9644 }, { "epoch": 2.26, "learning_rate": 2.9937398684177054e-06, "loss": 0.0353, "step": 9645 }, { "epoch": 2.26, "learning_rate": 2.9919317548761653e-06, "loss": 0.0044, "step": 9646 }, { "epoch": 2.26, "learning_rate": 2.9901240914614837e-06, "loss": 0.0181, "step": 9647 }, { "epoch": 2.26, "learning_rate": 2.988316878289772e-06, "loss": 0.0096, "step": 9648 }, { "epoch": 2.26, "learning_rate": 2.986510115477105e-06, "loss": 0.003, "step": 9649 }, { "epoch": 2.26, "learning_rate": 2.9847038031395257e-06, "loss": 0.0008, "step": 9650 }, { "epoch": 2.27, "learning_rate": 2.9828979413930626e-06, "loss": 0.0011, "step": 9651 }, { "epoch": 2.27, "learning_rate": 2.9810925303536997e-06, "loss": 0.0022, "step": 9652 }, { "epoch": 2.27, "learning_rate": 2.9792875701374e-06, "loss": 0.0023, "step": 9653 }, { "epoch": 2.27, "learning_rate": 2.9774830608600923e-06, "loss": 0.0297, "step": 9654 }, { "epoch": 2.27, "learning_rate": 2.975679002637686e-06, "loss": 0.0195, "step": 9655 }, { "epoch": 2.27, "learning_rate": 2.973875395586051e-06, "loss": 0.0428, "step": 9656 }, { "epoch": 2.27, "learning_rate": 2.972072239821032e-06, "loss": 0.0087, "step": 9657 }, { "epoch": 2.27, "learning_rate": 2.9702695354584497e-06, "loss": 0.0515, "step": 9658 }, { "epoch": 2.27, "learning_rate": 2.9684672826140893e-06, "loss": 0.059, "step": 9659 }, { "epoch": 2.27, "learning_rate": 2.966665481403709e-06, "loss": 0.0035, "step": 9660 }, { "epoch": 2.27, "learning_rate": 2.9648641319430337e-06, "loss": 0.0009, "step": 9661 }, { "epoch": 2.27, "learning_rate": 2.9630632343477717e-06, "loss": 0.0316, "step": 9662 }, { "epoch": 2.27, "learning_rate": 2.9612627887335877e-06, "loss": 0.035, "step": 9663 }, { "epoch": 2.27, "learning_rate": 2.9594627952161302e-06, "loss": 0.0135, "step": 9664 }, { "epoch": 2.27, "learning_rate": 2.9576632539110086e-06, "loss": 0.0004, "step": 9665 }, { "epoch": 2.27, "learning_rate": 2.955864164933805e-06, "loss": 0.0049, "step": 9666 }, { "epoch": 2.27, "learning_rate": 2.9540655284000796e-06, "loss": 0.0288, "step": 9667 }, { "epoch": 2.27, "learning_rate": 2.9522673444253556e-06, "loss": 0.027, "step": 9668 }, { "epoch": 2.27, "learning_rate": 2.950469613125131e-06, "loss": 0.0004, "step": 9669 }, { "epoch": 2.27, "learning_rate": 2.9486723346148693e-06, "loss": 0.0052, "step": 9670 }, { "epoch": 2.27, "learning_rate": 2.9468755090100175e-06, "loss": 0.0341, "step": 9671 }, { "epoch": 2.27, "learning_rate": 2.94507913642598e-06, "loss": 0.0041, "step": 9672 }, { "epoch": 2.27, "learning_rate": 2.9432832169781354e-06, "loss": 0.0022, "step": 9673 }, { "epoch": 2.27, "learning_rate": 2.941487750781842e-06, "loss": 0.0676, "step": 9674 }, { "epoch": 2.27, "learning_rate": 2.9396927379524176e-06, "loss": 0.0006, "step": 9675 }, { "epoch": 2.27, "learning_rate": 2.9378981786051574e-06, "loss": 0.0184, "step": 9676 }, { "epoch": 2.27, "learning_rate": 2.9361040728553213e-06, "loss": 0.0188, "step": 9677 }, { "epoch": 2.27, "learning_rate": 2.9343104208181518e-06, "loss": 0.0245, "step": 9678 }, { "epoch": 2.27, "learning_rate": 2.9325172226088493e-06, "loss": 0.0112, "step": 9679 }, { "epoch": 2.27, "learning_rate": 2.9307244783425924e-06, "loss": 0.0108, "step": 9680 }, { "epoch": 2.27, "learning_rate": 2.9289321881345257e-06, "loss": 0.0595, "step": 9681 }, { "epoch": 2.27, "learning_rate": 2.92714035209977e-06, "loss": 0.0035, "step": 9682 }, { "epoch": 2.27, "learning_rate": 2.925348970353418e-06, "loss": 0.0007, "step": 9683 }, { "epoch": 2.27, "learning_rate": 2.923558043010527e-06, "loss": 0.0069, "step": 9684 }, { "epoch": 2.27, "learning_rate": 2.921767570186127e-06, "loss": 0.0195, "step": 9685 }, { "epoch": 2.27, "learning_rate": 2.919977551995218e-06, "loss": 0.0101, "step": 9686 }, { "epoch": 2.27, "learning_rate": 2.9181879885527774e-06, "loss": 0.005, "step": 9687 }, { "epoch": 2.27, "learning_rate": 2.9163988799737465e-06, "loss": 0.0089, "step": 9688 }, { "epoch": 2.27, "learning_rate": 2.914610226373038e-06, "loss": 0.0027, "step": 9689 }, { "epoch": 2.27, "learning_rate": 2.9128220278655362e-06, "loss": 0.0215, "step": 9690 }, { "epoch": 2.27, "learning_rate": 2.9110342845661e-06, "loss": 0.0234, "step": 9691 }, { "epoch": 2.27, "learning_rate": 2.9092469965895542e-06, "loss": 0.0028, "step": 9692 }, { "epoch": 2.27, "learning_rate": 2.907460164050693e-06, "loss": 0.0194, "step": 9693 }, { "epoch": 2.28, "learning_rate": 2.9056737870642892e-06, "loss": 0.022, "step": 9694 }, { "epoch": 2.28, "learning_rate": 2.9038878657450796e-06, "loss": 0.0518, "step": 9695 }, { "epoch": 2.28, "learning_rate": 2.9021024002077737e-06, "loss": 0.026, "step": 9696 }, { "epoch": 2.28, "learning_rate": 2.9003173905670477e-06, "loss": 0.0059, "step": 9697 }, { "epoch": 2.28, "learning_rate": 2.89853283693756e-06, "loss": 0.0602, "step": 9698 }, { "epoch": 2.28, "learning_rate": 2.8967487394339265e-06, "loss": 0.0107, "step": 9699 }, { "epoch": 2.28, "learning_rate": 2.894965098170739e-06, "loss": 0.0282, "step": 9700 }, { "epoch": 2.28, "learning_rate": 2.893181913262565e-06, "loss": 0.0216, "step": 9701 }, { "epoch": 2.28, "learning_rate": 2.891399184823933e-06, "loss": 0.0032, "step": 9702 }, { "epoch": 2.28, "learning_rate": 2.889616912969352e-06, "loss": 0.0483, "step": 9703 }, { "epoch": 2.28, "learning_rate": 2.887835097813295e-06, "loss": 0.0238, "step": 9704 }, { "epoch": 2.28, "learning_rate": 2.886053739470208e-06, "loss": 0.0174, "step": 9705 }, { "epoch": 2.28, "learning_rate": 2.8842728380545036e-06, "loss": 0.0006, "step": 9706 }, { "epoch": 2.28, "learning_rate": 2.882492393680575e-06, "loss": 0.0086, "step": 9707 }, { "epoch": 2.28, "learning_rate": 2.8807124064627766e-06, "loss": 0.0131, "step": 9708 }, { "epoch": 2.28, "learning_rate": 2.8789328765154334e-06, "loss": 0.0655, "step": 9709 }, { "epoch": 2.28, "learning_rate": 2.877153803952851e-06, "loss": 0.0575, "step": 9710 }, { "epoch": 2.28, "learning_rate": 2.8753751888892955e-06, "loss": 0.0179, "step": 9711 }, { "epoch": 2.28, "learning_rate": 2.873597031439006e-06, "loss": 0.0121, "step": 9712 }, { "epoch": 2.28, "learning_rate": 2.8718193317161923e-06, "loss": 0.0209, "step": 9713 }, { "epoch": 2.28, "learning_rate": 2.87004208983504e-06, "loss": 0.0005, "step": 9714 }, { "epoch": 2.28, "learning_rate": 2.868265305909699e-06, "loss": 0.0277, "step": 9715 }, { "epoch": 2.28, "learning_rate": 2.8664889800542883e-06, "loss": 0.0231, "step": 9716 }, { "epoch": 2.28, "learning_rate": 2.8647131123829063e-06, "loss": 0.0104, "step": 9717 }, { "epoch": 2.28, "learning_rate": 2.8629377030096115e-06, "loss": 0.0258, "step": 9718 }, { "epoch": 2.28, "learning_rate": 2.8611627520484454e-06, "loss": 0.0134, "step": 9719 }, { "epoch": 2.28, "learning_rate": 2.8593882596134073e-06, "loss": 0.0005, "step": 9720 }, { "epoch": 2.28, "learning_rate": 2.8576142258184738e-06, "loss": 0.011, "step": 9721 }, { "epoch": 2.28, "learning_rate": 2.855840650777587e-06, "loss": 0.0133, "step": 9722 }, { "epoch": 2.28, "learning_rate": 2.85406753460467e-06, "loss": 0.0554, "step": 9723 }, { "epoch": 2.28, "learning_rate": 2.852294877413606e-06, "loss": 0.021, "step": 9724 }, { "epoch": 2.28, "learning_rate": 2.8505226793182505e-06, "loss": 0.0101, "step": 9725 }, { "epoch": 2.28, "learning_rate": 2.8487509404324375e-06, "loss": 0.0373, "step": 9726 }, { "epoch": 2.28, "learning_rate": 2.8469796608699608e-06, "loss": 0.0132, "step": 9727 }, { "epoch": 2.28, "learning_rate": 2.845208840744591e-06, "loss": 0.0054, "step": 9728 }, { "epoch": 2.28, "learning_rate": 2.843438480170063e-06, "loss": 0.0362, "step": 9729 }, { "epoch": 2.28, "learning_rate": 2.841668579260094e-06, "loss": 0.0009, "step": 9730 }, { "epoch": 2.28, "learning_rate": 2.8398991381283612e-06, "loss": 0.0333, "step": 9731 }, { "epoch": 2.28, "learning_rate": 2.8381301568885144e-06, "loss": 0.0472, "step": 9732 }, { "epoch": 2.28, "learning_rate": 2.8363616356541736e-06, "loss": 0.0312, "step": 9733 }, { "epoch": 2.28, "learning_rate": 2.834593574538932e-06, "loss": 0.0359, "step": 9734 }, { "epoch": 2.28, "learning_rate": 2.832825973656359e-06, "loss": 0.0207, "step": 9735 }, { "epoch": 2.28, "learning_rate": 2.831058833119975e-06, "loss": 0.0149, "step": 9736 }, { "epoch": 2.29, "learning_rate": 2.8292921530432914e-06, "loss": 0.0181, "step": 9737 }, { "epoch": 2.29, "learning_rate": 2.8275259335397764e-06, "loss": 0.0002, "step": 9738 }, { "epoch": 2.29, "learning_rate": 2.8257601747228802e-06, "loss": 0.0031, "step": 9739 }, { "epoch": 2.29, "learning_rate": 2.8239948767060134e-06, "loss": 0.0019, "step": 9740 }, { "epoch": 2.29, "learning_rate": 2.8222300396025603e-06, "loss": 0.0022, "step": 9741 }, { "epoch": 2.29, "learning_rate": 2.820465663525874e-06, "loss": 0.0042, "step": 9742 }, { "epoch": 2.29, "learning_rate": 2.8187017485892865e-06, "loss": 0.0186, "step": 9743 }, { "epoch": 2.29, "learning_rate": 2.8169382949060896e-06, "loss": 0.0006, "step": 9744 }, { "epoch": 2.29, "learning_rate": 2.8151753025895478e-06, "loss": 0.0155, "step": 9745 }, { "epoch": 2.29, "learning_rate": 2.8134127717529025e-06, "loss": 0.0017, "step": 9746 }, { "epoch": 2.29, "learning_rate": 2.8116507025093576e-06, "loss": 0.0446, "step": 9747 }, { "epoch": 2.29, "learning_rate": 2.8098890949720915e-06, "loss": 0.0295, "step": 9748 }, { "epoch": 2.29, "learning_rate": 2.808127949254248e-06, "loss": 0.0007, "step": 9749 }, { "epoch": 2.29, "learning_rate": 2.806367265468949e-06, "loss": 0.0132, "step": 9750 }, { "epoch": 2.29, "learning_rate": 2.8046070437292895e-06, "loss": 0.005, "step": 9751 }, { "epoch": 2.29, "learning_rate": 2.8028472841483134e-06, "loss": 0.0373, "step": 9752 }, { "epoch": 2.29, "learning_rate": 2.8010879868390615e-06, "loss": 0.0117, "step": 9753 }, { "epoch": 2.29, "learning_rate": 2.7993291519145262e-06, "loss": 0.0136, "step": 9754 }, { "epoch": 2.29, "learning_rate": 2.7975707794876827e-06, "loss": 0.0004, "step": 9755 }, { "epoch": 2.29, "learning_rate": 2.795812869671468e-06, "loss": 0.0085, "step": 9756 }, { "epoch": 2.29, "learning_rate": 2.7940554225787932e-06, "loss": 0.0199, "step": 9757 }, { "epoch": 2.29, "learning_rate": 2.792298438322535e-06, "loss": 0.0213, "step": 9758 }, { "epoch": 2.29, "learning_rate": 2.79054191701555e-06, "loss": 0.0168, "step": 9759 }, { "epoch": 2.29, "learning_rate": 2.788785858770656e-06, "loss": 0.0091, "step": 9760 }, { "epoch": 2.29, "learning_rate": 2.787030263700643e-06, "loss": 0.0086, "step": 9761 }, { "epoch": 2.29, "learning_rate": 2.785275131918277e-06, "loss": 0.0007, "step": 9762 }, { "epoch": 2.29, "learning_rate": 2.7835204635362865e-06, "loss": 0.0207, "step": 9763 }, { "epoch": 2.29, "learning_rate": 2.7817662586673743e-06, "loss": 0.0333, "step": 9764 }, { "epoch": 2.29, "learning_rate": 2.78001251742421e-06, "loss": 0.0033, "step": 9765 }, { "epoch": 2.29, "learning_rate": 2.7782592399194418e-06, "loss": 0.0244, "step": 9766 }, { "epoch": 2.29, "learning_rate": 2.776506426265678e-06, "loss": 0.0283, "step": 9767 }, { "epoch": 2.29, "learning_rate": 2.774754076575501e-06, "loss": 0.033, "step": 9768 }, { "epoch": 2.29, "learning_rate": 2.7730021909614667e-06, "loss": 0.0062, "step": 9769 }, { "epoch": 2.29, "learning_rate": 2.7712507695360957e-06, "loss": 0.0005, "step": 9770 }, { "epoch": 2.29, "learning_rate": 2.769499812411889e-06, "loss": 0.0015, "step": 9771 }, { "epoch": 2.29, "learning_rate": 2.767749319701297e-06, "loss": 0.0162, "step": 9772 }, { "epoch": 2.29, "learning_rate": 2.7659992915167633e-06, "loss": 0.0014, "step": 9773 }, { "epoch": 2.29, "learning_rate": 2.764249727970687e-06, "loss": 0.0023, "step": 9774 }, { "epoch": 2.29, "learning_rate": 2.7625006291754475e-06, "loss": 0.0247, "step": 9775 }, { "epoch": 2.29, "learning_rate": 2.760751995243386e-06, "loss": 0.0024, "step": 9776 }, { "epoch": 2.29, "learning_rate": 2.7590038262868137e-06, "loss": 0.0039, "step": 9777 }, { "epoch": 2.29, "learning_rate": 2.7572561224180215e-06, "loss": 0.0117, "step": 9778 }, { "epoch": 2.3, "learning_rate": 2.7555088837492615e-06, "loss": 0.0429, "step": 9779 }, { "epoch": 2.3, "learning_rate": 2.753762110392757e-06, "loss": 0.0005, "step": 9780 }, { "epoch": 2.3, "learning_rate": 2.7520158024607012e-06, "loss": 0.0037, "step": 9781 }, { "epoch": 2.3, "learning_rate": 2.7502699600652648e-06, "loss": 0.0037, "step": 9782 }, { "epoch": 2.3, "learning_rate": 2.7485245833185805e-06, "loss": 0.0069, "step": 9783 }, { "epoch": 2.3, "learning_rate": 2.7467796723327523e-06, "loss": 0.0409, "step": 9784 }, { "epoch": 2.3, "learning_rate": 2.7450352272198533e-06, "loss": 0.0071, "step": 9785 }, { "epoch": 2.3, "learning_rate": 2.7432912480919315e-06, "loss": 0.0093, "step": 9786 }, { "epoch": 2.3, "learning_rate": 2.741547735061009e-06, "loss": 0.0023, "step": 9787 }, { "epoch": 2.3, "learning_rate": 2.739804688239058e-06, "loss": 0.0342, "step": 9788 }, { "epoch": 2.3, "learning_rate": 2.7380621077380443e-06, "loss": 0.0154, "step": 9789 }, { "epoch": 2.3, "learning_rate": 2.736319993669887e-06, "loss": 0.0006, "step": 9790 }, { "epoch": 2.3, "learning_rate": 2.734578346146488e-06, "loss": 0.0319, "step": 9791 }, { "epoch": 2.3, "learning_rate": 2.7328371652797093e-06, "loss": 0.0003, "step": 9792 }, { "epoch": 2.3, "learning_rate": 2.7310964511813853e-06, "loss": 0.02, "step": 9793 }, { "epoch": 2.3, "learning_rate": 2.7293562039633248e-06, "loss": 0.0043, "step": 9794 }, { "epoch": 2.3, "learning_rate": 2.7276164237373035e-06, "loss": 0.0115, "step": 9795 }, { "epoch": 2.3, "learning_rate": 2.725877110615066e-06, "loss": 0.0026, "step": 9796 }, { "epoch": 2.3, "learning_rate": 2.7241382647083247e-06, "loss": 0.0337, "step": 9797 }, { "epoch": 2.3, "learning_rate": 2.7223998861287716e-06, "loss": 0.022, "step": 9798 }, { "epoch": 2.3, "learning_rate": 2.72066197498806e-06, "loss": 0.0009, "step": 9799 }, { "epoch": 2.3, "learning_rate": 2.718924531397814e-06, "loss": 0.0148, "step": 9800 }, { "epoch": 2.3, "learning_rate": 2.717187555469628e-06, "loss": 0.0013, "step": 9801 }, { "epoch": 2.3, "learning_rate": 2.7154510473150696e-06, "loss": 0.0028, "step": 9802 }, { "epoch": 2.3, "learning_rate": 2.7137150070456808e-06, "loss": 0.0509, "step": 9803 }, { "epoch": 2.3, "learning_rate": 2.7119794347729554e-06, "loss": 0.0011, "step": 9804 }, { "epoch": 2.3, "learning_rate": 2.710244330608377e-06, "loss": 0.0038, "step": 9805 }, { "epoch": 2.3, "learning_rate": 2.708509694663386e-06, "loss": 0.0119, "step": 9806 }, { "epoch": 2.3, "learning_rate": 2.7067755270494057e-06, "loss": 0.0009, "step": 9807 }, { "epoch": 2.3, "learning_rate": 2.70504182787781e-06, "loss": 0.0002, "step": 9808 }, { "epoch": 2.3, "learning_rate": 2.703308597259964e-06, "loss": 0.0008, "step": 9809 }, { "epoch": 2.3, "learning_rate": 2.7015758353071843e-06, "loss": 0.005, "step": 9810 }, { "epoch": 2.3, "learning_rate": 2.6998435421307736e-06, "loss": 0.013, "step": 9811 }, { "epoch": 2.3, "learning_rate": 2.6981117178419936e-06, "loss": 0.0147, "step": 9812 }, { "epoch": 2.3, "learning_rate": 2.6963803625520756e-06, "loss": 0.001, "step": 9813 }, { "epoch": 2.3, "learning_rate": 2.6946494763722307e-06, "loss": 0.0121, "step": 9814 }, { "epoch": 2.3, "learning_rate": 2.692919059413631e-06, "loss": 0.0006, "step": 9815 }, { "epoch": 2.3, "learning_rate": 2.691189111787419e-06, "loss": 0.0082, "step": 9816 }, { "epoch": 2.3, "learning_rate": 2.6894596336047072e-06, "loss": 0.0252, "step": 9817 }, { "epoch": 2.3, "learning_rate": 2.6877306249765856e-06, "loss": 0.0053, "step": 9818 }, { "epoch": 2.3, "learning_rate": 2.6860020860141043e-06, "loss": 0.0036, "step": 9819 }, { "epoch": 2.3, "learning_rate": 2.6842740168282843e-06, "loss": 0.0008, "step": 9820 }, { "epoch": 2.3, "learning_rate": 2.6825464175301264e-06, "loss": 0.0262, "step": 9821 }, { "epoch": 2.31, "learning_rate": 2.680819288230586e-06, "loss": 0.0005, "step": 9822 }, { "epoch": 2.31, "learning_rate": 2.6790926290406073e-06, "loss": 0.0409, "step": 9823 }, { "epoch": 2.31, "learning_rate": 2.67736644007108e-06, "loss": 0.0061, "step": 9824 }, { "epoch": 2.31, "learning_rate": 2.675640721432885e-06, "loss": 0.0025, "step": 9825 }, { "epoch": 2.31, "learning_rate": 2.673915473236861e-06, "loss": 0.0116, "step": 9826 }, { "epoch": 2.31, "learning_rate": 2.672190695593826e-06, "loss": 0.0037, "step": 9827 }, { "epoch": 2.31, "learning_rate": 2.6704663886145577e-06, "loss": 0.0074, "step": 9828 }, { "epoch": 2.31, "learning_rate": 2.6687425524098053e-06, "loss": 0.0434, "step": 9829 }, { "epoch": 2.31, "learning_rate": 2.6670191870902974e-06, "loss": 0.0023, "step": 9830 }, { "epoch": 2.31, "learning_rate": 2.6652962927667213e-06, "loss": 0.0023, "step": 9831 }, { "epoch": 2.31, "learning_rate": 2.6635738695497402e-06, "loss": 0.0305, "step": 9832 }, { "epoch": 2.31, "learning_rate": 2.66185191754998e-06, "loss": 0.0008, "step": 9833 }, { "epoch": 2.31, "learning_rate": 2.660130436878048e-06, "loss": 0.0004, "step": 9834 }, { "epoch": 2.31, "learning_rate": 2.6584094276445114e-06, "loss": 0.0046, "step": 9835 }, { "epoch": 2.31, "learning_rate": 2.6566888899599074e-06, "loss": 0.0158, "step": 9836 }, { "epoch": 2.31, "learning_rate": 2.6549688239347526e-06, "loss": 0.0065, "step": 9837 }, { "epoch": 2.31, "learning_rate": 2.6532492296795187e-06, "loss": 0.0193, "step": 9838 }, { "epoch": 2.31, "learning_rate": 2.6515301073046663e-06, "loss": 0.001, "step": 9839 }, { "epoch": 2.31, "learning_rate": 2.6498114569206e-06, "loss": 0.0155, "step": 9840 }, { "epoch": 2.31, "learning_rate": 2.6480932786377176e-06, "loss": 0.0175, "step": 9841 }, { "epoch": 2.31, "learning_rate": 2.646375572566373e-06, "loss": 0.0281, "step": 9842 }, { "epoch": 2.31, "learning_rate": 2.6446583388169024e-06, "loss": 0.0069, "step": 9843 }, { "epoch": 2.31, "learning_rate": 2.6429415774995916e-06, "loss": 0.0207, "step": 9844 }, { "epoch": 2.31, "learning_rate": 2.6412252887247125e-06, "loss": 0.0004, "step": 9845 }, { "epoch": 2.31, "learning_rate": 2.639509472602506e-06, "loss": 0.0069, "step": 9846 }, { "epoch": 2.31, "learning_rate": 2.6377941292431765e-06, "loss": 0.0089, "step": 9847 }, { "epoch": 2.31, "learning_rate": 2.6360792587568985e-06, "loss": 0.01, "step": 9848 }, { "epoch": 2.31, "learning_rate": 2.634364861253815e-06, "loss": 0.0012, "step": 9849 }, { "epoch": 2.31, "learning_rate": 2.632650936844049e-06, "loss": 0.0472, "step": 9850 }, { "epoch": 2.31, "learning_rate": 2.630937485637681e-06, "loss": 0.0038, "step": 9851 }, { "epoch": 2.31, "learning_rate": 2.6292245077447675e-06, "loss": 0.0072, "step": 9852 }, { "epoch": 2.31, "learning_rate": 2.6275120032753266e-06, "loss": 0.0217, "step": 9853 }, { "epoch": 2.31, "learning_rate": 2.625799972339358e-06, "loss": 0.0252, "step": 9854 }, { "epoch": 2.31, "learning_rate": 2.6240884150468295e-06, "loss": 0.008, "step": 9855 }, { "epoch": 2.31, "learning_rate": 2.622377331507663e-06, "loss": 0.0044, "step": 9856 }, { "epoch": 2.31, "learning_rate": 2.6206667218317695e-06, "loss": 0.0602, "step": 9857 }, { "epoch": 2.31, "learning_rate": 2.6189565861290147e-06, "loss": 0.041, "step": 9858 }, { "epoch": 2.31, "learning_rate": 2.6172469245092503e-06, "loss": 0.0067, "step": 9859 }, { "epoch": 2.31, "learning_rate": 2.6155377370822755e-06, "loss": 0.0081, "step": 9860 }, { "epoch": 2.31, "learning_rate": 2.613829023957879e-06, "loss": 0.0076, "step": 9861 }, { "epoch": 2.31, "learning_rate": 2.6121207852458063e-06, "loss": 0.001, "step": 9862 }, { "epoch": 2.31, "learning_rate": 2.610413021055783e-06, "loss": 0.0104, "step": 9863 }, { "epoch": 2.32, "learning_rate": 2.608705731497495e-06, "loss": 0.0139, "step": 9864 }, { "epoch": 2.32, "learning_rate": 2.6069989166805987e-06, "loss": 0.0681, "step": 9865 }, { "epoch": 2.32, "learning_rate": 2.6052925767147273e-06, "loss": 0.0011, "step": 9866 }, { "epoch": 2.32, "learning_rate": 2.603586711709477e-06, "loss": 0.0014, "step": 9867 }, { "epoch": 2.32, "learning_rate": 2.6018813217744155e-06, "loss": 0.0309, "step": 9868 }, { "epoch": 2.32, "learning_rate": 2.600176407019075e-06, "loss": 0.0015, "step": 9869 }, { "epoch": 2.32, "learning_rate": 2.598471967552966e-06, "loss": 0.0178, "step": 9870 }, { "epoch": 2.32, "learning_rate": 2.596768003485569e-06, "loss": 0.0475, "step": 9871 }, { "epoch": 2.32, "learning_rate": 2.5950645149263196e-06, "loss": 0.0163, "step": 9872 }, { "epoch": 2.32, "learning_rate": 2.5933615019846402e-06, "loss": 0.014, "step": 9873 }, { "epoch": 2.32, "learning_rate": 2.591658964769909e-06, "loss": 0.0241, "step": 9874 }, { "epoch": 2.32, "learning_rate": 2.5899569033914883e-06, "loss": 0.087, "step": 9875 }, { "epoch": 2.32, "learning_rate": 2.58825531795869e-06, "loss": 0.0144, "step": 9876 }, { "epoch": 2.32, "learning_rate": 2.586554208580816e-06, "loss": 0.0116, "step": 9877 }, { "epoch": 2.32, "learning_rate": 2.58485357536712e-06, "loss": 0.0091, "step": 9878 }, { "epoch": 2.32, "learning_rate": 2.5831534184268414e-06, "loss": 0.0021, "step": 9879 }, { "epoch": 2.32, "learning_rate": 2.5814537378691773e-06, "loss": 0.007, "step": 9880 }, { "epoch": 2.32, "learning_rate": 2.5797545338032947e-06, "loss": 0.0059, "step": 9881 }, { "epoch": 2.32, "learning_rate": 2.5780558063383388e-06, "loss": 0.0314, "step": 9882 }, { "epoch": 2.32, "learning_rate": 2.5763575555834163e-06, "loss": 0.017, "step": 9883 }, { "epoch": 2.32, "learning_rate": 2.574659781647606e-06, "loss": 0.0292, "step": 9884 }, { "epoch": 2.32, "learning_rate": 2.5729624846399516e-06, "loss": 0.0024, "step": 9885 }, { "epoch": 2.32, "learning_rate": 2.5712656646694756e-06, "loss": 0.0153, "step": 9886 }, { "epoch": 2.32, "learning_rate": 2.5695693218451635e-06, "loss": 0.0016, "step": 9887 }, { "epoch": 2.32, "learning_rate": 2.5678734562759653e-06, "loss": 0.0322, "step": 9888 }, { "epoch": 2.32, "learning_rate": 2.5661780680708147e-06, "loss": 0.0493, "step": 9889 }, { "epoch": 2.32, "learning_rate": 2.564483157338599e-06, "loss": 0.0078, "step": 9890 }, { "epoch": 2.32, "learning_rate": 2.56278872418819e-06, "loss": 0.0178, "step": 9891 }, { "epoch": 2.32, "learning_rate": 2.5610947687284106e-06, "loss": 0.0012, "step": 9892 }, { "epoch": 2.32, "learning_rate": 2.559401291068071e-06, "loss": 0.0034, "step": 9893 }, { "epoch": 2.32, "learning_rate": 2.557708291315938e-06, "loss": 0.0102, "step": 9894 }, { "epoch": 2.32, "learning_rate": 2.5560157695807606e-06, "loss": 0.0549, "step": 9895 }, { "epoch": 2.32, "learning_rate": 2.5543237259712383e-06, "loss": 0.0504, "step": 9896 }, { "epoch": 2.32, "learning_rate": 2.552632160596056e-06, "loss": 0.0002, "step": 9897 }, { "epoch": 2.32, "learning_rate": 2.550941073563865e-06, "loss": 0.0034, "step": 9898 }, { "epoch": 2.32, "learning_rate": 2.5492504649832818e-06, "loss": 0.0426, "step": 9899 }, { "epoch": 2.32, "learning_rate": 2.547560334962893e-06, "loss": 0.0013, "step": 9900 }, { "epoch": 2.32, "learning_rate": 2.5458706836112525e-06, "loss": 0.0015, "step": 9901 }, { "epoch": 2.32, "learning_rate": 2.5441815110368918e-06, "loss": 0.0411, "step": 9902 }, { "epoch": 2.32, "learning_rate": 2.5424928173483045e-06, "loss": 0.002, "step": 9903 }, { "epoch": 2.32, "learning_rate": 2.5408046026539533e-06, "loss": 0.0053, "step": 9904 }, { "epoch": 2.32, "learning_rate": 2.5391168670622703e-06, "loss": 0.0454, "step": 9905 }, { "epoch": 2.32, "learning_rate": 2.537429610681661e-06, "loss": 0.0103, "step": 9906 }, { "epoch": 2.33, "learning_rate": 2.5357428336205024e-06, "loss": 0.0187, "step": 9907 }, { "epoch": 2.33, "learning_rate": 2.5340565359871263e-06, "loss": 0.0392, "step": 9908 }, { "epoch": 2.33, "learning_rate": 2.5323707178898505e-06, "loss": 0.0036, "step": 9909 }, { "epoch": 2.33, "learning_rate": 2.530685379436949e-06, "loss": 0.0173, "step": 9910 }, { "epoch": 2.33, "learning_rate": 2.5290005207366807e-06, "loss": 0.0454, "step": 9911 }, { "epoch": 2.33, "learning_rate": 2.52731614189725e-06, "loss": 0.0582, "step": 9912 }, { "epoch": 2.33, "learning_rate": 2.525632243026852e-06, "loss": 0.0238, "step": 9913 }, { "epoch": 2.33, "learning_rate": 2.5239488242336453e-06, "loss": 0.0026, "step": 9914 }, { "epoch": 2.33, "learning_rate": 2.522265885625752e-06, "loss": 0.016, "step": 9915 }, { "epoch": 2.33, "learning_rate": 2.5205834273112683e-06, "loss": 0.0006, "step": 9916 }, { "epoch": 2.33, "learning_rate": 2.5189014493982543e-06, "loss": 0.0083, "step": 9917 }, { "epoch": 2.33, "learning_rate": 2.5172199519947494e-06, "loss": 0.0043, "step": 9918 }, { "epoch": 2.33, "learning_rate": 2.5155389352087523e-06, "loss": 0.0434, "step": 9919 }, { "epoch": 2.33, "learning_rate": 2.5138583991482347e-06, "loss": 0.0059, "step": 9920 }, { "epoch": 2.33, "learning_rate": 2.5121783439211345e-06, "loss": 0.0271, "step": 9921 }, { "epoch": 2.33, "learning_rate": 2.510498769635368e-06, "loss": 0.0209, "step": 9922 }, { "epoch": 2.33, "learning_rate": 2.5088196763988083e-06, "loss": 0.0398, "step": 9923 }, { "epoch": 2.33, "learning_rate": 2.507141064319303e-06, "loss": 0.0365, "step": 9924 }, { "epoch": 2.33, "learning_rate": 2.5054629335046733e-06, "loss": 0.0133, "step": 9925 }, { "epoch": 2.33, "learning_rate": 2.503785284062701e-06, "loss": 0.0403, "step": 9926 }, { "epoch": 2.33, "learning_rate": 2.5021081161011475e-06, "loss": 0.0094, "step": 9927 }, { "epoch": 2.33, "learning_rate": 2.5004314297277277e-06, "loss": 0.0309, "step": 9928 }, { "epoch": 2.33, "learning_rate": 2.498755225050141e-06, "loss": 0.0085, "step": 9929 }, { "epoch": 2.33, "learning_rate": 2.4970795021760475e-06, "loss": 0.0029, "step": 9930 }, { "epoch": 2.33, "learning_rate": 2.4954042612130813e-06, "loss": 0.0401, "step": 9931 }, { "epoch": 2.33, "learning_rate": 2.493729502268841e-06, "loss": 0.0286, "step": 9932 }, { "epoch": 2.33, "learning_rate": 2.4920552254508934e-06, "loss": 0.0067, "step": 9933 }, { "epoch": 2.33, "learning_rate": 2.4903814308667816e-06, "loss": 0.0392, "step": 9934 }, { "epoch": 2.33, "learning_rate": 2.4887081186240113e-06, "loss": 0.0336, "step": 9935 }, { "epoch": 2.33, "learning_rate": 2.48703528883006e-06, "loss": 0.0007, "step": 9936 }, { "epoch": 2.33, "learning_rate": 2.4853629415923675e-06, "loss": 0.0135, "step": 9937 }, { "epoch": 2.33, "learning_rate": 2.4836910770183562e-06, "loss": 0.0319, "step": 9938 }, { "epoch": 2.33, "learning_rate": 2.4820196952154065e-06, "loss": 0.0007, "step": 9939 }, { "epoch": 2.33, "learning_rate": 2.4803487962908666e-06, "loss": 0.0011, "step": 9940 }, { "epoch": 2.33, "learning_rate": 2.4786783803520663e-06, "loss": 0.0973, "step": 9941 }, { "epoch": 2.33, "learning_rate": 2.4770084475062915e-06, "loss": 0.001, "step": 9942 }, { "epoch": 2.33, "learning_rate": 2.475338997860802e-06, "loss": 0.0851, "step": 9943 }, { "epoch": 2.33, "learning_rate": 2.4736700315228247e-06, "loss": 0.0127, "step": 9944 }, { "epoch": 2.33, "learning_rate": 2.4720015485995595e-06, "loss": 0.0024, "step": 9945 }, { "epoch": 2.33, "learning_rate": 2.4703335491981696e-06, "loss": 0.0009, "step": 9946 }, { "epoch": 2.33, "learning_rate": 2.4686660334257985e-06, "loss": 0.0088, "step": 9947 }, { "epoch": 2.33, "learning_rate": 2.4669990013895385e-06, "loss": 0.0018, "step": 9948 }, { "epoch": 2.33, "learning_rate": 2.4653324531964683e-06, "loss": 0.0045, "step": 9949 }, { "epoch": 2.34, "learning_rate": 2.463666388953634e-06, "loss": 0.0143, "step": 9950 }, { "epoch": 2.34, "learning_rate": 2.4620008087680423e-06, "loss": 0.0012, "step": 9951 }, { "epoch": 2.34, "learning_rate": 2.460335712746673e-06, "loss": 0.0077, "step": 9952 }, { "epoch": 2.34, "learning_rate": 2.4586711009964738e-06, "loss": 0.0218, "step": 9953 }, { "epoch": 2.34, "learning_rate": 2.4570069736243663e-06, "loss": 0.0018, "step": 9954 }, { "epoch": 2.34, "learning_rate": 2.4553433307372353e-06, "loss": 0.0007, "step": 9955 }, { "epoch": 2.34, "learning_rate": 2.4536801724419335e-06, "loss": 0.0248, "step": 9956 }, { "epoch": 2.34, "learning_rate": 2.4520174988452895e-06, "loss": 0.0003, "step": 9957 }, { "epoch": 2.34, "learning_rate": 2.4503553100540955e-06, "loss": 0.0007, "step": 9958 }, { "epoch": 2.34, "learning_rate": 2.448693606175112e-06, "loss": 0.0334, "step": 9959 }, { "epoch": 2.34, "learning_rate": 2.447032387315068e-06, "loss": 0.0014, "step": 9960 }, { "epoch": 2.34, "learning_rate": 2.4453716535806694e-06, "loss": 0.0226, "step": 9961 }, { "epoch": 2.34, "learning_rate": 2.443711405078577e-06, "loss": 0.0202, "step": 9962 }, { "epoch": 2.34, "learning_rate": 2.4420516419154394e-06, "loss": 0.015, "step": 9963 }, { "epoch": 2.34, "learning_rate": 2.44039236419785e-06, "loss": 0.053, "step": 9964 }, { "epoch": 2.34, "learning_rate": 2.438733572032389e-06, "loss": 0.0007, "step": 9965 }, { "epoch": 2.34, "learning_rate": 2.4370752655256046e-06, "loss": 0.0498, "step": 9966 }, { "epoch": 2.34, "learning_rate": 2.4354174447840063e-06, "loss": 0.0318, "step": 9967 }, { "epoch": 2.34, "learning_rate": 2.4337601099140764e-06, "loss": 0.0016, "step": 9968 }, { "epoch": 2.34, "learning_rate": 2.4321032610222594e-06, "loss": 0.0031, "step": 9969 }, { "epoch": 2.34, "learning_rate": 2.430446898214983e-06, "loss": 0.0006, "step": 9970 }, { "epoch": 2.34, "learning_rate": 2.428791021598631e-06, "loss": 0.003, "step": 9971 }, { "epoch": 2.34, "learning_rate": 2.42713563127956e-06, "loss": 0.0107, "step": 9972 }, { "epoch": 2.34, "learning_rate": 2.4254807273640933e-06, "loss": 0.0012, "step": 9973 }, { "epoch": 2.34, "learning_rate": 2.42382630995853e-06, "loss": 0.0208, "step": 9974 }, { "epoch": 2.34, "learning_rate": 2.422172379169131e-06, "loss": 0.0136, "step": 9975 }, { "epoch": 2.34, "learning_rate": 2.420518935102123e-06, "loss": 0.0234, "step": 9976 }, { "epoch": 2.34, "learning_rate": 2.418865977863715e-06, "loss": 0.0067, "step": 9977 }, { "epoch": 2.34, "learning_rate": 2.417213507560071e-06, "loss": 0.0003, "step": 9978 }, { "epoch": 2.34, "learning_rate": 2.4155615242973306e-06, "loss": 0.0106, "step": 9979 }, { "epoch": 2.34, "learning_rate": 2.4139100281815965e-06, "loss": 0.0083, "step": 9980 }, { "epoch": 2.34, "learning_rate": 2.412259019318949e-06, "loss": 0.0068, "step": 9981 }, { "epoch": 2.34, "learning_rate": 2.4106084978154277e-06, "loss": 0.0355, "step": 9982 }, { "epoch": 2.34, "learning_rate": 2.40895846377705e-06, "loss": 0.0383, "step": 9983 }, { "epoch": 2.34, "learning_rate": 2.4073089173097954e-06, "loss": 0.0916, "step": 9984 }, { "epoch": 2.34, "learning_rate": 2.405659858519611e-06, "loss": 0.017, "step": 9985 }, { "epoch": 2.34, "learning_rate": 2.40401128751242e-06, "loss": 0.0002, "step": 9986 }, { "epoch": 2.34, "learning_rate": 2.402363204394108e-06, "loss": 0.0005, "step": 9987 }, { "epoch": 2.34, "learning_rate": 2.400715609270531e-06, "loss": 0.0128, "step": 9988 }, { "epoch": 2.34, "learning_rate": 2.39906850224751e-06, "loss": 0.0014, "step": 9989 }, { "epoch": 2.34, "learning_rate": 2.397421883430846e-06, "loss": 0.0021, "step": 9990 }, { "epoch": 2.34, "learning_rate": 2.395775752926296e-06, "loss": 0.0131, "step": 9991 }, { "epoch": 2.35, "learning_rate": 2.3941301108395886e-06, "loss": 0.0287, "step": 9992 }, { "epoch": 2.35, "learning_rate": 2.3924849572764295e-06, "loss": 0.0042, "step": 9993 }, { "epoch": 2.35, "learning_rate": 2.390840292342482e-06, "loss": 0.0023, "step": 9994 }, { "epoch": 2.35, "learning_rate": 2.389196116143385e-06, "loss": 0.0103, "step": 9995 }, { "epoch": 2.35, "learning_rate": 2.3875524287847384e-06, "loss": 0.0315, "step": 9996 }, { "epoch": 2.35, "learning_rate": 2.385909230372124e-06, "loss": 0.0041, "step": 9997 }, { "epoch": 2.35, "learning_rate": 2.384266521011076e-06, "loss": 0.0007, "step": 9998 }, { "epoch": 2.35, "learning_rate": 2.3826243008071126e-06, "loss": 0.0005, "step": 9999 }, { "epoch": 2.35, "learning_rate": 2.3809825698657095e-06, "loss": 0.02, "step": 10000 }, { "epoch": 2.35, "learning_rate": 2.379341328292313e-06, "loss": 0.0351, "step": 10001 }, { "epoch": 2.35, "learning_rate": 2.3777005761923443e-06, "loss": 0.0011, "step": 10002 }, { "epoch": 2.35, "learning_rate": 2.376060313671186e-06, "loss": 0.0199, "step": 10003 }, { "epoch": 2.35, "learning_rate": 2.3744205408341924e-06, "loss": 0.0318, "step": 10004 }, { "epoch": 2.35, "learning_rate": 2.3727812577866828e-06, "loss": 0.0228, "step": 10005 }, { "epoch": 2.35, "learning_rate": 2.3711424646339533e-06, "loss": 0.0011, "step": 10006 }, { "epoch": 2.35, "learning_rate": 2.369504161481261e-06, "loss": 0.0422, "step": 10007 }, { "epoch": 2.35, "learning_rate": 2.3678663484338305e-06, "loss": 0.0006, "step": 10008 }, { "epoch": 2.35, "learning_rate": 2.3662290255968635e-06, "loss": 0.0016, "step": 10009 }, { "epoch": 2.35, "learning_rate": 2.3645921930755233e-06, "loss": 0.008, "step": 10010 }, { "epoch": 2.35, "learning_rate": 2.362955850974942e-06, "loss": 0.0022, "step": 10011 }, { "epoch": 2.35, "learning_rate": 2.361319999400219e-06, "loss": 0.0402, "step": 10012 }, { "epoch": 2.35, "learning_rate": 2.359684638456432e-06, "loss": 0.0156, "step": 10013 }, { "epoch": 2.35, "learning_rate": 2.3580497682486146e-06, "loss": 0.0262, "step": 10014 }, { "epoch": 2.35, "learning_rate": 2.3564153888817765e-06, "loss": 0.0142, "step": 10015 }, { "epoch": 2.35, "learning_rate": 2.3547815004608885e-06, "loss": 0.0004, "step": 10016 }, { "epoch": 2.35, "learning_rate": 2.3531481030909e-06, "loss": 0.0003, "step": 10017 }, { "epoch": 2.35, "learning_rate": 2.3515151968767248e-06, "loss": 0.0017, "step": 10018 }, { "epoch": 2.35, "learning_rate": 2.3498827819232416e-06, "loss": 0.0021, "step": 10019 }, { "epoch": 2.35, "learning_rate": 2.3482508583353012e-06, "loss": 0.0026, "step": 10020 }, { "epoch": 2.35, "learning_rate": 2.346619426217719e-06, "loss": 0.0261, "step": 10021 }, { "epoch": 2.35, "learning_rate": 2.3449884856752856e-06, "loss": 0.0005, "step": 10022 }, { "epoch": 2.35, "learning_rate": 2.343358036812754e-06, "loss": 0.0009, "step": 10023 }, { "epoch": 2.35, "learning_rate": 2.3417280797348475e-06, "loss": 0.0089, "step": 10024 }, { "epoch": 2.35, "learning_rate": 2.3400986145462555e-06, "loss": 0.012, "step": 10025 }, { "epoch": 2.35, "learning_rate": 2.338469641351644e-06, "loss": 0.0149, "step": 10026 }, { "epoch": 2.35, "learning_rate": 2.3368411602556384e-06, "loss": 0.0212, "step": 10027 }, { "epoch": 2.35, "learning_rate": 2.335213171362832e-06, "loss": 0.0003, "step": 10028 }, { "epoch": 2.35, "learning_rate": 2.3335856747777972e-06, "loss": 0.0116, "step": 10029 }, { "epoch": 2.35, "learning_rate": 2.3319586706050658e-06, "loss": 0.0025, "step": 10030 }, { "epoch": 2.35, "learning_rate": 2.330332158949138e-06, "loss": 0.0006, "step": 10031 }, { "epoch": 2.35, "learning_rate": 2.3287061399144815e-06, "loss": 0.0167, "step": 10032 }, { "epoch": 2.35, "learning_rate": 2.327080613605539e-06, "loss": 0.0141, "step": 10033 }, { "epoch": 2.35, "learning_rate": 2.325455580126721e-06, "loss": 0.0027, "step": 10034 }, { "epoch": 2.36, "learning_rate": 2.3238310395823992e-06, "loss": 0.0159, "step": 10035 }, { "epoch": 2.36, "learning_rate": 2.3222069920769173e-06, "loss": 0.0168, "step": 10036 }, { "epoch": 2.36, "learning_rate": 2.3205834377145864e-06, "loss": 0.0363, "step": 10037 }, { "epoch": 2.36, "learning_rate": 2.3189603765996904e-06, "loss": 0.0028, "step": 10038 }, { "epoch": 2.36, "learning_rate": 2.3173378088364774e-06, "loss": 0.0006, "step": 10039 }, { "epoch": 2.36, "learning_rate": 2.3157157345291637e-06, "loss": 0.0191, "step": 10040 }, { "epoch": 2.36, "learning_rate": 2.314094153781933e-06, "loss": 0.0148, "step": 10041 }, { "epoch": 2.36, "learning_rate": 2.3124730666989426e-06, "loss": 0.0108, "step": 10042 }, { "epoch": 2.36, "learning_rate": 2.310852473384314e-06, "loss": 0.0212, "step": 10043 }, { "epoch": 2.36, "learning_rate": 2.309232373942133e-06, "loss": 0.0131, "step": 10044 }, { "epoch": 2.36, "learning_rate": 2.307612768476465e-06, "loss": 0.0042, "step": 10045 }, { "epoch": 2.36, "learning_rate": 2.3059936570913345e-06, "loss": 0.004, "step": 10046 }, { "epoch": 2.36, "learning_rate": 2.304375039890736e-06, "loss": 0.0023, "step": 10047 }, { "epoch": 2.36, "learning_rate": 2.3027569169786303e-06, "loss": 0.0083, "step": 10048 }, { "epoch": 2.36, "learning_rate": 2.3011392884589535e-06, "loss": 0.0194, "step": 10049 }, { "epoch": 2.36, "learning_rate": 2.299522154435605e-06, "loss": 0.0193, "step": 10050 }, { "epoch": 2.36, "learning_rate": 2.2979055150124495e-06, "loss": 0.0004, "step": 10051 }, { "epoch": 2.36, "learning_rate": 2.2962893702933276e-06, "loss": 0.0067, "step": 10052 }, { "epoch": 2.36, "learning_rate": 2.29467372038204e-06, "loss": 0.0062, "step": 10053 }, { "epoch": 2.36, "learning_rate": 2.2930585653823643e-06, "loss": 0.0177, "step": 10054 }, { "epoch": 2.36, "learning_rate": 2.2914439053980395e-06, "loss": 0.0773, "step": 10055 }, { "epoch": 2.36, "learning_rate": 2.2898297405327752e-06, "loss": 0.0058, "step": 10056 }, { "epoch": 2.36, "learning_rate": 2.2882160708902444e-06, "loss": 0.0313, "step": 10057 }, { "epoch": 2.36, "learning_rate": 2.2866028965741005e-06, "loss": 0.0004, "step": 10058 }, { "epoch": 2.36, "learning_rate": 2.2849902176879522e-06, "loss": 0.0003, "step": 10059 }, { "epoch": 2.36, "learning_rate": 2.2833780343353808e-06, "loss": 0.0043, "step": 10060 }, { "epoch": 2.36, "learning_rate": 2.2817663466199413e-06, "loss": 0.0053, "step": 10061 }, { "epoch": 2.36, "learning_rate": 2.2801551546451494e-06, "loss": 0.0369, "step": 10062 }, { "epoch": 2.36, "learning_rate": 2.278544458514491e-06, "loss": 0.0073, "step": 10063 }, { "epoch": 2.36, "learning_rate": 2.276934258331419e-06, "loss": 0.0249, "step": 10064 }, { "epoch": 2.36, "learning_rate": 2.275324554199362e-06, "loss": 0.0043, "step": 10065 }, { "epoch": 2.36, "learning_rate": 2.2737153462217065e-06, "loss": 0.0021, "step": 10066 }, { "epoch": 2.36, "learning_rate": 2.2721066345018128e-06, "loss": 0.0215, "step": 10067 }, { "epoch": 2.36, "learning_rate": 2.270498419143006e-06, "loss": 0.0147, "step": 10068 }, { "epoch": 2.36, "learning_rate": 2.268890700248584e-06, "loss": 0.012, "step": 10069 }, { "epoch": 2.36, "learning_rate": 2.267283477921811e-06, "loss": 0.0089, "step": 10070 }, { "epoch": 2.36, "learning_rate": 2.2656767522659185e-06, "loss": 0.0049, "step": 10071 }, { "epoch": 2.36, "learning_rate": 2.2640705233841053e-06, "loss": 0.001, "step": 10072 }, { "epoch": 2.36, "learning_rate": 2.262464791379535e-06, "loss": 0.0058, "step": 10073 }, { "epoch": 2.36, "learning_rate": 2.2608595563553516e-06, "loss": 0.0726, "step": 10074 }, { "epoch": 2.36, "learning_rate": 2.2592548184146535e-06, "loss": 0.0037, "step": 10075 }, { "epoch": 2.36, "learning_rate": 2.257650577660512e-06, "loss": 0.0132, "step": 10076 }, { "epoch": 2.37, "learning_rate": 2.256046834195972e-06, "loss": 0.0112, "step": 10077 }, { "epoch": 2.37, "learning_rate": 2.2544435881240388e-06, "loss": 0.0052, "step": 10078 }, { "epoch": 2.37, "learning_rate": 2.252840839547689e-06, "loss": 0.0364, "step": 10079 }, { "epoch": 2.37, "learning_rate": 2.2512385885698617e-06, "loss": 0.0044, "step": 10080 }, { "epoch": 2.37, "learning_rate": 2.249636835293478e-06, "loss": 0.0449, "step": 10081 }, { "epoch": 2.37, "learning_rate": 2.2480355798214148e-06, "loss": 0.0853, "step": 10082 }, { "epoch": 2.37, "learning_rate": 2.246434822256519e-06, "loss": 0.0155, "step": 10083 }, { "epoch": 2.37, "learning_rate": 2.2448345627016053e-06, "loss": 0.0049, "step": 10084 }, { "epoch": 2.37, "learning_rate": 2.2432348012594605e-06, "loss": 0.006, "step": 10085 }, { "epoch": 2.37, "learning_rate": 2.2416355380328427e-06, "loss": 0.0225, "step": 10086 }, { "epoch": 2.37, "learning_rate": 2.24003677312446e-06, "loss": 0.0023, "step": 10087 }, { "epoch": 2.37, "learning_rate": 2.238438506637011e-06, "loss": 0.0011, "step": 10088 }, { "epoch": 2.37, "learning_rate": 2.236840738673144e-06, "loss": 0.0108, "step": 10089 }, { "epoch": 2.37, "learning_rate": 2.2352434693354918e-06, "loss": 0.0027, "step": 10090 }, { "epoch": 2.37, "learning_rate": 2.2336466987266415e-06, "loss": 0.001, "step": 10091 }, { "epoch": 2.37, "learning_rate": 2.2320504269491548e-06, "loss": 0.0117, "step": 10092 }, { "epoch": 2.37, "learning_rate": 2.2304546541055574e-06, "loss": 0.0559, "step": 10093 }, { "epoch": 2.37, "learning_rate": 2.2288593802983496e-06, "loss": 0.0557, "step": 10094 }, { "epoch": 2.37, "learning_rate": 2.227264605629994e-06, "loss": 0.0063, "step": 10095 }, { "epoch": 2.37, "learning_rate": 2.2256703302029193e-06, "loss": 0.0005, "step": 10096 }, { "epoch": 2.37, "learning_rate": 2.2240765541195306e-06, "loss": 0.0106, "step": 10097 }, { "epoch": 2.37, "learning_rate": 2.222483277482195e-06, "loss": 0.0318, "step": 10098 }, { "epoch": 2.37, "learning_rate": 2.2208905003932444e-06, "loss": 0.0156, "step": 10099 }, { "epoch": 2.37, "learning_rate": 2.219298222954984e-06, "loss": 0.002, "step": 10100 }, { "epoch": 2.37, "learning_rate": 2.2177064452696893e-06, "loss": 0.0135, "step": 10101 }, { "epoch": 2.37, "learning_rate": 2.2161151674395977e-06, "loss": 0.0001, "step": 10102 }, { "epoch": 2.37, "learning_rate": 2.2145243895669132e-06, "loss": 0.0037, "step": 10103 }, { "epoch": 2.37, "learning_rate": 2.2129341117538163e-06, "loss": 0.0451, "step": 10104 }, { "epoch": 2.37, "learning_rate": 2.2113443341024452e-06, "loss": 0.0042, "step": 10105 }, { "epoch": 2.37, "learning_rate": 2.2097550567149195e-06, "loss": 0.0492, "step": 10106 }, { "epoch": 2.37, "learning_rate": 2.2081662796933067e-06, "loss": 0.0176, "step": 10107 }, { "epoch": 2.37, "learning_rate": 2.206578003139663e-06, "loss": 0.0166, "step": 10108 }, { "epoch": 2.37, "learning_rate": 2.2049902271559962e-06, "loss": 0.0035, "step": 10109 }, { "epoch": 2.37, "learning_rate": 2.2034029518442947e-06, "loss": 0.0048, "step": 10110 }, { "epoch": 2.37, "learning_rate": 2.2018161773065073e-06, "loss": 0.022, "step": 10111 }, { "epoch": 2.37, "learning_rate": 2.2002299036445475e-06, "loss": 0.0184, "step": 10112 }, { "epoch": 2.37, "learning_rate": 2.1986441309603077e-06, "loss": 0.0089, "step": 10113 }, { "epoch": 2.37, "learning_rate": 2.197058859355641e-06, "loss": 0.0003, "step": 10114 }, { "epoch": 2.37, "learning_rate": 2.1954740889323656e-06, "loss": 0.0063, "step": 10115 }, { "epoch": 2.37, "learning_rate": 2.1938898197922696e-06, "loss": 0.001, "step": 10116 }, { "epoch": 2.37, "learning_rate": 2.1923060520371175e-06, "loss": 0.0073, "step": 10117 }, { "epoch": 2.37, "learning_rate": 2.19072278576863e-06, "loss": 0.0011, "step": 10118 }, { "epoch": 2.37, "learning_rate": 2.1891400210884973e-06, "loss": 0.0248, "step": 10119 }, { "epoch": 2.38, "learning_rate": 2.1875577580983853e-06, "loss": 0.0264, "step": 10120 }, { "epoch": 2.38, "learning_rate": 2.1859759968999184e-06, "loss": 0.0002, "step": 10121 }, { "epoch": 2.38, "learning_rate": 2.1843947375946994e-06, "loss": 0.0027, "step": 10122 }, { "epoch": 2.38, "learning_rate": 2.182813980284282e-06, "loss": 0.0023, "step": 10123 }, { "epoch": 2.38, "learning_rate": 2.1812337250702066e-06, "loss": 0.0096, "step": 10124 }, { "epoch": 2.38, "learning_rate": 2.1796539720539666e-06, "loss": 0.0011, "step": 10125 }, { "epoch": 2.38, "learning_rate": 2.1780747213370345e-06, "loss": 0.037, "step": 10126 }, { "epoch": 2.38, "learning_rate": 2.1764959730208425e-06, "loss": 0.0149, "step": 10127 }, { "epoch": 2.38, "learning_rate": 2.1749177272067923e-06, "loss": 0.0505, "step": 10128 }, { "epoch": 2.38, "learning_rate": 2.1733399839962576e-06, "loss": 0.0002, "step": 10129 }, { "epoch": 2.38, "learning_rate": 2.171762743490574e-06, "loss": 0.0118, "step": 10130 }, { "epoch": 2.38, "learning_rate": 2.1701860057910495e-06, "loss": 0.0003, "step": 10131 }, { "epoch": 2.38, "learning_rate": 2.1686097709989516e-06, "loss": 0.0006, "step": 10132 }, { "epoch": 2.38, "learning_rate": 2.167034039215531e-06, "loss": 0.0029, "step": 10133 }, { "epoch": 2.38, "learning_rate": 2.1654588105419903e-06, "loss": 0.0035, "step": 10134 }, { "epoch": 2.38, "learning_rate": 2.1638840850795075e-06, "loss": 0.0063, "step": 10135 }, { "epoch": 2.38, "learning_rate": 2.162309862929225e-06, "loss": 0.0189, "step": 10136 }, { "epoch": 2.38, "learning_rate": 2.160736144192256e-06, "loss": 0.0066, "step": 10137 }, { "epoch": 2.38, "learning_rate": 2.159162928969687e-06, "loss": 0.001, "step": 10138 }, { "epoch": 2.38, "learning_rate": 2.157590217362554e-06, "loss": 0.0401, "step": 10139 }, { "epoch": 2.38, "learning_rate": 2.1560180094718797e-06, "loss": 0.0212, "step": 10140 }, { "epoch": 2.38, "learning_rate": 2.154446305398641e-06, "loss": 0.0032, "step": 10141 }, { "epoch": 2.38, "learning_rate": 2.152875105243797e-06, "loss": 0.0022, "step": 10142 }, { "epoch": 2.38, "learning_rate": 2.1513044091082545e-06, "loss": 0.0008, "step": 10143 }, { "epoch": 2.38, "learning_rate": 2.149734217092907e-06, "loss": 0.0162, "step": 10144 }, { "epoch": 2.38, "learning_rate": 2.1481645292986033e-06, "loss": 0.0105, "step": 10145 }, { "epoch": 2.38, "learning_rate": 2.146595345826168e-06, "loss": 0.0339, "step": 10146 }, { "epoch": 2.38, "learning_rate": 2.1450266667763863e-06, "loss": 0.0017, "step": 10147 }, { "epoch": 2.38, "learning_rate": 2.1434584922500146e-06, "loss": 0.0024, "step": 10148 }, { "epoch": 2.38, "learning_rate": 2.1418908223477787e-06, "loss": 0.0022, "step": 10149 }, { "epoch": 2.38, "learning_rate": 2.1403236571703678e-06, "loss": 0.0009, "step": 10150 }, { "epoch": 2.38, "learning_rate": 2.1387569968184405e-06, "loss": 0.0017, "step": 10151 }, { "epoch": 2.38, "learning_rate": 2.137190841392621e-06, "loss": 0.0107, "step": 10152 }, { "epoch": 2.38, "learning_rate": 2.13562519099351e-06, "loss": 0.0109, "step": 10153 }, { "epoch": 2.38, "learning_rate": 2.134060045721663e-06, "loss": 0.0152, "step": 10154 }, { "epoch": 2.38, "learning_rate": 2.1324954056776082e-06, "loss": 0.0069, "step": 10155 }, { "epoch": 2.38, "learning_rate": 2.130931270961847e-06, "loss": 0.0011, "step": 10156 }, { "epoch": 2.38, "learning_rate": 2.129367641674839e-06, "loss": 0.0042, "step": 10157 }, { "epoch": 2.38, "learning_rate": 2.1278045179170215e-06, "loss": 0.0025, "step": 10158 }, { "epoch": 2.38, "learning_rate": 2.1262418997887856e-06, "loss": 0.005, "step": 10159 }, { "epoch": 2.38, "learning_rate": 2.1246797873905057e-06, "loss": 0.0024, "step": 10160 }, { "epoch": 2.38, "learning_rate": 2.1231181808225087e-06, "loss": 0.0003, "step": 10161 }, { "epoch": 2.38, "learning_rate": 2.121557080185103e-06, "loss": 0.0021, "step": 10162 }, { "epoch": 2.39, "learning_rate": 2.1199964855785547e-06, "loss": 0.0177, "step": 10163 }, { "epoch": 2.39, "learning_rate": 2.1184363971030984e-06, "loss": 0.0074, "step": 10164 }, { "epoch": 2.39, "learning_rate": 2.1168768148589426e-06, "loss": 0.0092, "step": 10165 }, { "epoch": 2.39, "learning_rate": 2.1153177389462574e-06, "loss": 0.0253, "step": 10166 }, { "epoch": 2.39, "learning_rate": 2.1137591694651816e-06, "loss": 0.0088, "step": 10167 }, { "epoch": 2.39, "learning_rate": 2.1122011065158188e-06, "loss": 0.0066, "step": 10168 }, { "epoch": 2.39, "learning_rate": 2.1106435501982493e-06, "loss": 0.0, "step": 10169 }, { "epoch": 2.39, "learning_rate": 2.1090865006125107e-06, "loss": 0.0025, "step": 10170 }, { "epoch": 2.39, "learning_rate": 2.1075299578586096e-06, "loss": 0.002, "step": 10171 }, { "epoch": 2.39, "learning_rate": 2.105973922036528e-06, "loss": 0.0347, "step": 10172 }, { "epoch": 2.39, "learning_rate": 2.104418393246206e-06, "loss": 0.0069, "step": 10173 }, { "epoch": 2.39, "learning_rate": 2.10286337158756e-06, "loss": 0.0004, "step": 10174 }, { "epoch": 2.39, "learning_rate": 2.10130885716046e-06, "loss": 0.0166, "step": 10175 }, { "epoch": 2.39, "learning_rate": 2.09975485006476e-06, "loss": 0.0069, "step": 10176 }, { "epoch": 2.39, "learning_rate": 2.0982013504002672e-06, "loss": 0.0034, "step": 10177 }, { "epoch": 2.39, "learning_rate": 2.096648358266772e-06, "loss": 0.0001, "step": 10178 }, { "epoch": 2.39, "learning_rate": 2.0950958737640117e-06, "loss": 0.0116, "step": 10179 }, { "epoch": 2.39, "learning_rate": 2.0935438969917065e-06, "loss": 0.0003, "step": 10180 }, { "epoch": 2.39, "learning_rate": 2.091992428049543e-06, "loss": 0.0104, "step": 10181 }, { "epoch": 2.39, "learning_rate": 2.0904414670371688e-06, "loss": 0.0136, "step": 10182 }, { "epoch": 2.39, "learning_rate": 2.088891014054202e-06, "loss": 0.0011, "step": 10183 }, { "epoch": 2.39, "learning_rate": 2.0873410692002247e-06, "loss": 0.0003, "step": 10184 }, { "epoch": 2.39, "learning_rate": 2.0857916325747963e-06, "loss": 0.0347, "step": 10185 }, { "epoch": 2.39, "learning_rate": 2.084242704277434e-06, "loss": 0.0098, "step": 10186 }, { "epoch": 2.39, "learning_rate": 2.0826942844076227e-06, "loss": 0.0003, "step": 10187 }, { "epoch": 2.39, "learning_rate": 2.081146373064816e-06, "loss": 0.0133, "step": 10188 }, { "epoch": 2.39, "learning_rate": 2.07959897034844e-06, "loss": 0.0075, "step": 10189 }, { "epoch": 2.39, "learning_rate": 2.0780520763578882e-06, "loss": 0.0012, "step": 10190 }, { "epoch": 2.39, "learning_rate": 2.0765056911925064e-06, "loss": 0.0006, "step": 10191 }, { "epoch": 2.39, "learning_rate": 2.074959814951626e-06, "loss": 0.0469, "step": 10192 }, { "epoch": 2.39, "learning_rate": 2.073414447734534e-06, "loss": 0.0039, "step": 10193 }, { "epoch": 2.39, "learning_rate": 2.0718695896404985e-06, "loss": 0.0009, "step": 10194 }, { "epoch": 2.39, "learning_rate": 2.0703252407687315e-06, "loss": 0.0007, "step": 10195 }, { "epoch": 2.39, "learning_rate": 2.068781401218436e-06, "loss": 0.0032, "step": 10196 }, { "epoch": 2.39, "learning_rate": 2.0672380710887674e-06, "loss": 0.0032, "step": 10197 }, { "epoch": 2.39, "learning_rate": 2.0656952504788573e-06, "loss": 0.0094, "step": 10198 }, { "epoch": 2.39, "learning_rate": 2.0641529394878013e-06, "loss": 0.0248, "step": 10199 }, { "epoch": 2.39, "learning_rate": 2.062611138214655e-06, "loss": 0.0003, "step": 10200 }, { "epoch": 2.39, "learning_rate": 2.0610698467584567e-06, "loss": 0.0004, "step": 10201 }, { "epoch": 2.39, "learning_rate": 2.059529065218199e-06, "loss": 0.001, "step": 10202 }, { "epoch": 2.39, "learning_rate": 2.0579887936928466e-06, "loss": 0.0004, "step": 10203 }, { "epoch": 2.39, "learning_rate": 2.056449032281327e-06, "loss": 0.0044, "step": 10204 }, { "epoch": 2.4, "learning_rate": 2.0549097810825423e-06, "loss": 0.0178, "step": 10205 }, { "epoch": 2.4, "learning_rate": 2.0533710401953644e-06, "loss": 0.0087, "step": 10206 }, { "epoch": 2.4, "learning_rate": 2.0518328097186137e-06, "loss": 0.0082, "step": 10207 }, { "epoch": 2.4, "learning_rate": 2.050295089751101e-06, "loss": 0.0024, "step": 10208 }, { "epoch": 2.4, "learning_rate": 2.0487578803915853e-06, "loss": 0.0209, "step": 10209 }, { "epoch": 2.4, "learning_rate": 2.047221181738812e-06, "loss": 0.0006, "step": 10210 }, { "epoch": 2.4, "learning_rate": 2.0456849938914715e-06, "loss": 0.0013, "step": 10211 }, { "epoch": 2.4, "learning_rate": 2.044149316948241e-06, "loss": 0.0219, "step": 10212 }, { "epoch": 2.4, "learning_rate": 2.0426141510077503e-06, "loss": 0.0017, "step": 10213 }, { "epoch": 2.4, "learning_rate": 2.0410794961686087e-06, "loss": 0.0022, "step": 10214 }, { "epoch": 2.4, "learning_rate": 2.039545352529384e-06, "loss": 0.0064, "step": 10215 }, { "epoch": 2.4, "learning_rate": 2.0380117201886118e-06, "loss": 0.0461, "step": 10216 }, { "epoch": 2.4, "learning_rate": 2.036478599244802e-06, "loss": 0.0213, "step": 10217 }, { "epoch": 2.4, "learning_rate": 2.0349459897964253e-06, "loss": 0.0065, "step": 10218 }, { "epoch": 2.4, "learning_rate": 2.033413891941919e-06, "loss": 0.0215, "step": 10219 }, { "epoch": 2.4, "learning_rate": 2.0318823057796867e-06, "loss": 0.0007, "step": 10220 }, { "epoch": 2.4, "learning_rate": 2.030351231408109e-06, "loss": 0.0013, "step": 10221 }, { "epoch": 2.4, "learning_rate": 2.028820668925522e-06, "loss": 0.0077, "step": 10222 }, { "epoch": 2.4, "learning_rate": 2.0272906184302323e-06, "loss": 0.0039, "step": 10223 }, { "epoch": 2.4, "learning_rate": 2.0257610800205185e-06, "loss": 0.0015, "step": 10224 }, { "epoch": 2.4, "learning_rate": 2.024232053794619e-06, "loss": 0.0086, "step": 10225 }, { "epoch": 2.4, "learning_rate": 2.02270353985075e-06, "loss": 0.0042, "step": 10226 }, { "epoch": 2.4, "learning_rate": 2.0211755382870758e-06, "loss": 0.0207, "step": 10227 }, { "epoch": 2.4, "learning_rate": 2.01964804920175e-06, "loss": 0.0012, "step": 10228 }, { "epoch": 2.4, "learning_rate": 2.018121072692876e-06, "loss": 0.0002, "step": 10229 }, { "epoch": 2.4, "learning_rate": 2.0165946088585397e-06, "loss": 0.0015, "step": 10230 }, { "epoch": 2.4, "learning_rate": 2.0150686577967748e-06, "loss": 0.0334, "step": 10231 }, { "epoch": 2.4, "learning_rate": 2.0135432196055973e-06, "loss": 0.0127, "step": 10232 }, { "epoch": 2.4, "learning_rate": 2.0120182943829902e-06, "loss": 0.0097, "step": 10233 }, { "epoch": 2.4, "learning_rate": 2.010493882226896e-06, "loss": 0.015, "step": 10234 }, { "epoch": 2.4, "learning_rate": 2.008969983235226e-06, "loss": 0.0005, "step": 10235 }, { "epoch": 2.4, "learning_rate": 2.0074465975058586e-06, "loss": 0.0023, "step": 10236 }, { "epoch": 2.4, "learning_rate": 2.005923725136646e-06, "loss": 0.0051, "step": 10237 }, { "epoch": 2.4, "learning_rate": 2.004401366225398e-06, "loss": 0.0189, "step": 10238 }, { "epoch": 2.4, "learning_rate": 2.002879520869895e-06, "loss": 0.0147, "step": 10239 }, { "epoch": 2.4, "learning_rate": 2.001358189167888e-06, "loss": 0.0055, "step": 10240 }, { "epoch": 2.4, "learning_rate": 1.999837371217087e-06, "loss": 0.0063, "step": 10241 }, { "epoch": 2.4, "learning_rate": 1.998317067115183e-06, "loss": 0.0309, "step": 10242 }, { "epoch": 2.4, "learning_rate": 1.9967972769598132e-06, "loss": 0.0052, "step": 10243 }, { "epoch": 2.4, "learning_rate": 1.9952780008486017e-06, "loss": 0.0074, "step": 10244 }, { "epoch": 2.4, "learning_rate": 1.993759238879125e-06, "loss": 0.0292, "step": 10245 }, { "epoch": 2.4, "learning_rate": 1.9922409911489426e-06, "loss": 0.0282, "step": 10246 }, { "epoch": 2.4, "learning_rate": 1.990723257755559e-06, "loss": 0.0278, "step": 10247 }, { "epoch": 2.41, "learning_rate": 1.989206038796464e-06, "loss": 0.0057, "step": 10248 }, { "epoch": 2.41, "learning_rate": 1.9876893343691117e-06, "loss": 0.0208, "step": 10249 }, { "epoch": 2.41, "learning_rate": 1.9861731445709164e-06, "loss": 0.0038, "step": 10250 }, { "epoch": 2.41, "learning_rate": 1.9846574694992616e-06, "loss": 0.0588, "step": 10251 }, { "epoch": 2.41, "learning_rate": 1.983142309251498e-06, "loss": 0.0014, "step": 10252 }, { "epoch": 2.41, "learning_rate": 1.9816276639249476e-06, "loss": 0.015, "step": 10253 }, { "epoch": 2.41, "learning_rate": 1.9801135336168943e-06, "loss": 0.0188, "step": 10254 }, { "epoch": 2.41, "learning_rate": 1.978599918424591e-06, "loss": 0.0231, "step": 10255 }, { "epoch": 2.41, "learning_rate": 1.9770868184452517e-06, "loss": 0.0136, "step": 10256 }, { "epoch": 2.41, "learning_rate": 1.975574233776071e-06, "loss": 0.0014, "step": 10257 }, { "epoch": 2.41, "learning_rate": 1.9740621645141967e-06, "loss": 0.0075, "step": 10258 }, { "epoch": 2.41, "learning_rate": 1.9725506107567483e-06, "loss": 0.0271, "step": 10259 }, { "epoch": 2.41, "learning_rate": 1.971039572600816e-06, "loss": 0.0159, "step": 10260 }, { "epoch": 2.41, "learning_rate": 1.9695290501434485e-06, "loss": 0.0012, "step": 10261 }, { "epoch": 2.41, "learning_rate": 1.968019043481675e-06, "loss": 0.0054, "step": 10262 }, { "epoch": 2.41, "learning_rate": 1.966509552712472e-06, "loss": 0.0001, "step": 10263 }, { "epoch": 2.41, "learning_rate": 1.965000577932802e-06, "loss": 0.0064, "step": 10264 }, { "epoch": 2.41, "learning_rate": 1.9634921192395805e-06, "loss": 0.0007, "step": 10265 }, { "epoch": 2.41, "learning_rate": 1.961984176729701e-06, "loss": 0.0005, "step": 10266 }, { "epoch": 2.41, "learning_rate": 1.9604767505000157e-06, "loss": 0.0017, "step": 10267 }, { "epoch": 2.41, "learning_rate": 1.9589698406473433e-06, "loss": 0.0016, "step": 10268 }, { "epoch": 2.41, "learning_rate": 1.9574634472684785e-06, "loss": 0.0024, "step": 10269 }, { "epoch": 2.41, "learning_rate": 1.955957570460174e-06, "loss": 0.0022, "step": 10270 }, { "epoch": 2.41, "learning_rate": 1.95445221031915e-06, "loss": 0.001, "step": 10271 }, { "epoch": 2.41, "learning_rate": 1.952947366942095e-06, "loss": 0.0025, "step": 10272 }, { "epoch": 2.41, "learning_rate": 1.951443040425669e-06, "loss": 0.0252, "step": 10273 }, { "epoch": 2.41, "learning_rate": 1.9499392308664935e-06, "loss": 0.0045, "step": 10274 }, { "epoch": 2.41, "learning_rate": 1.948435938361153e-06, "loss": 0.0007, "step": 10275 }, { "epoch": 2.41, "learning_rate": 1.9469331630062092e-06, "loss": 0.0088, "step": 10276 }, { "epoch": 2.41, "learning_rate": 1.9454309048981822e-06, "loss": 0.0092, "step": 10277 }, { "epoch": 2.41, "learning_rate": 1.943929164133568e-06, "loss": 0.0025, "step": 10278 }, { "epoch": 2.41, "learning_rate": 1.9424279408088122e-06, "loss": 0.0386, "step": 10279 }, { "epoch": 2.41, "learning_rate": 1.9409272350203454e-06, "loss": 0.0002, "step": 10280 }, { "epoch": 2.41, "learning_rate": 1.9394270468645548e-06, "loss": 0.012, "step": 10281 }, { "epoch": 2.41, "learning_rate": 1.9379273764378003e-06, "loss": 0.0014, "step": 10282 }, { "epoch": 2.41, "learning_rate": 1.936428223836404e-06, "loss": 0.0558, "step": 10283 }, { "epoch": 2.41, "learning_rate": 1.934929589156653e-06, "loss": 0.0108, "step": 10284 }, { "epoch": 2.41, "learning_rate": 1.93343147249481e-06, "loss": 0.0297, "step": 10285 }, { "epoch": 2.41, "learning_rate": 1.931933873947095e-06, "loss": 0.0189, "step": 10286 }, { "epoch": 2.41, "learning_rate": 1.9304367936097e-06, "loss": 0.0016, "step": 10287 }, { "epoch": 2.41, "learning_rate": 1.9289402315787787e-06, "loss": 0.0001, "step": 10288 }, { "epoch": 2.41, "learning_rate": 1.92744418795046e-06, "loss": 0.0027, "step": 10289 }, { "epoch": 2.41, "learning_rate": 1.9259486628208323e-06, "loss": 0.0005, "step": 10290 }, { "epoch": 2.42, "learning_rate": 1.9244536562859507e-06, "loss": 0.0135, "step": 10291 }, { "epoch": 2.42, "learning_rate": 1.9229591684418435e-06, "loss": 0.0006, "step": 10292 }, { "epoch": 2.42, "learning_rate": 1.9214651993845002e-06, "loss": 0.013, "step": 10293 }, { "epoch": 2.42, "learning_rate": 1.919971749209876e-06, "loss": 0.0057, "step": 10294 }, { "epoch": 2.42, "learning_rate": 1.9184788180138934e-06, "loss": 0.0012, "step": 10295 }, { "epoch": 2.42, "learning_rate": 1.9169864058924493e-06, "loss": 0.0225, "step": 10296 }, { "epoch": 2.42, "learning_rate": 1.915494512941394e-06, "loss": 0.035, "step": 10297 }, { "epoch": 2.42, "learning_rate": 1.9140031392565594e-06, "loss": 0.0113, "step": 10298 }, { "epoch": 2.42, "learning_rate": 1.9125122849337276e-06, "loss": 0.0018, "step": 10299 }, { "epoch": 2.42, "learning_rate": 1.91102195006866e-06, "loss": 0.0017, "step": 10300 }, { "epoch": 2.42, "learning_rate": 1.9095321347570827e-06, "loss": 0.0023, "step": 10301 }, { "epoch": 2.42, "learning_rate": 1.9080428390946837e-06, "loss": 0.0194, "step": 10302 }, { "epoch": 2.42, "learning_rate": 1.9065540631771218e-06, "loss": 0.0007, "step": 10303 }, { "epoch": 2.42, "learning_rate": 1.905065807100016e-06, "loss": 0.0111, "step": 10304 }, { "epoch": 2.42, "learning_rate": 1.9035780709589624e-06, "loss": 0.0116, "step": 10305 }, { "epoch": 2.42, "learning_rate": 1.9020908548495165e-06, "loss": 0.0143, "step": 10306 }, { "epoch": 2.42, "learning_rate": 1.900604158867201e-06, "loss": 0.0693, "step": 10307 }, { "epoch": 2.42, "learning_rate": 1.8991179831075036e-06, "loss": 0.0003, "step": 10308 }, { "epoch": 2.42, "learning_rate": 1.897632327665886e-06, "loss": 0.0019, "step": 10309 }, { "epoch": 2.42, "learning_rate": 1.8961471926377695e-06, "loss": 0.0025, "step": 10310 }, { "epoch": 2.42, "learning_rate": 1.8946625781185412e-06, "loss": 0.0, "step": 10311 }, { "epoch": 2.42, "learning_rate": 1.8931784842035639e-06, "loss": 0.0066, "step": 10312 }, { "epoch": 2.42, "learning_rate": 1.8916949109881533e-06, "loss": 0.0008, "step": 10313 }, { "epoch": 2.42, "learning_rate": 1.8902118585676077e-06, "loss": 0.0017, "step": 10314 }, { "epoch": 2.42, "learning_rate": 1.8887293270371744e-06, "loss": 0.0096, "step": 10315 }, { "epoch": 2.42, "learning_rate": 1.8872473164920813e-06, "loss": 0.009, "step": 10316 }, { "epoch": 2.42, "learning_rate": 1.8857658270275148e-06, "loss": 0.004, "step": 10317 }, { "epoch": 2.42, "learning_rate": 1.8842848587386341e-06, "loss": 0.0038, "step": 10318 }, { "epoch": 2.42, "learning_rate": 1.8828044117205601e-06, "loss": 0.0107, "step": 10319 }, { "epoch": 2.42, "learning_rate": 1.8813244860683788e-06, "loss": 0.0097, "step": 10320 }, { "epoch": 2.42, "learning_rate": 1.8798450818771496e-06, "loss": 0.0228, "step": 10321 }, { "epoch": 2.42, "learning_rate": 1.8783661992418933e-06, "loss": 0.0001, "step": 10322 }, { "epoch": 2.42, "learning_rate": 1.8768878382575983e-06, "loss": 0.0274, "step": 10323 }, { "epoch": 2.42, "learning_rate": 1.8754099990192154e-06, "loss": 0.0096, "step": 10324 }, { "epoch": 2.42, "learning_rate": 1.8739326816216718e-06, "loss": 0.0023, "step": 10325 }, { "epoch": 2.42, "learning_rate": 1.872455886159853e-06, "loss": 0.0025, "step": 10326 }, { "epoch": 2.42, "learning_rate": 1.8709796127286107e-06, "loss": 0.0514, "step": 10327 }, { "epoch": 2.42, "learning_rate": 1.86950386142277e-06, "loss": 0.0392, "step": 10328 }, { "epoch": 2.42, "learning_rate": 1.8680286323371166e-06, "loss": 0.0034, "step": 10329 }, { "epoch": 2.42, "learning_rate": 1.8665539255664034e-06, "loss": 0.0492, "step": 10330 }, { "epoch": 2.42, "learning_rate": 1.865079741205349e-06, "loss": 0.0005, "step": 10331 }, { "epoch": 2.42, "learning_rate": 1.8636060793486432e-06, "loss": 0.0114, "step": 10332 }, { "epoch": 2.43, "learning_rate": 1.862132940090936e-06, "loss": 0.0195, "step": 10333 }, { "epoch": 2.43, "learning_rate": 1.8606603235268505e-06, "loss": 0.0589, "step": 10334 }, { "epoch": 2.43, "learning_rate": 1.859188229750971e-06, "loss": 0.0509, "step": 10335 }, { "epoch": 2.43, "learning_rate": 1.8577166588578465e-06, "loss": 0.0002, "step": 10336 }, { "epoch": 2.43, "learning_rate": 1.8562456109420025e-06, "loss": 0.0019, "step": 10337 }, { "epoch": 2.43, "learning_rate": 1.8547750860979185e-06, "loss": 0.0204, "step": 10338 }, { "epoch": 2.43, "learning_rate": 1.8533050844200484e-06, "loss": 0.0004, "step": 10339 }, { "epoch": 2.43, "learning_rate": 1.8518356060028064e-06, "loss": 0.0017, "step": 10340 }, { "epoch": 2.43, "learning_rate": 1.8503666509405827e-06, "loss": 0.0038, "step": 10341 }, { "epoch": 2.43, "learning_rate": 1.8488982193277249e-06, "loss": 0.0011, "step": 10342 }, { "epoch": 2.43, "learning_rate": 1.847430311258549e-06, "loss": 0.019, "step": 10343 }, { "epoch": 2.43, "learning_rate": 1.8459629268273415e-06, "loss": 0.0094, "step": 10344 }, { "epoch": 2.43, "learning_rate": 1.8444960661283507e-06, "loss": 0.0058, "step": 10345 }, { "epoch": 2.43, "learning_rate": 1.843029729255792e-06, "loss": 0.0609, "step": 10346 }, { "epoch": 2.43, "learning_rate": 1.8415639163038456e-06, "loss": 0.0539, "step": 10347 }, { "epoch": 2.43, "learning_rate": 1.8400986273666665e-06, "loss": 0.018, "step": 10348 }, { "epoch": 2.43, "learning_rate": 1.838633862538367e-06, "loss": 0.0241, "step": 10349 }, { "epoch": 2.43, "learning_rate": 1.8371696219130287e-06, "loss": 0.0066, "step": 10350 }, { "epoch": 2.43, "learning_rate": 1.835705905584696e-06, "loss": 0.0023, "step": 10351 }, { "epoch": 2.43, "learning_rate": 1.8342427136473862e-06, "loss": 0.0229, "step": 10352 }, { "epoch": 2.43, "learning_rate": 1.8327800461950828e-06, "loss": 0.0077, "step": 10353 }, { "epoch": 2.43, "learning_rate": 1.8313179033217287e-06, "loss": 0.0253, "step": 10354 }, { "epoch": 2.43, "learning_rate": 1.8298562851212398e-06, "loss": 0.0211, "step": 10355 }, { "epoch": 2.43, "learning_rate": 1.8283951916874898e-06, "loss": 0.0008, "step": 10356 }, { "epoch": 2.43, "learning_rate": 1.8269346231143325e-06, "loss": 0.0044, "step": 10357 }, { "epoch": 2.43, "learning_rate": 1.8254745794955752e-06, "loss": 0.03, "step": 10358 }, { "epoch": 2.43, "learning_rate": 1.8240150609249962e-06, "loss": 0.0343, "step": 10359 }, { "epoch": 2.43, "learning_rate": 1.8225560674963394e-06, "loss": 0.0355, "step": 10360 }, { "epoch": 2.43, "learning_rate": 1.8210975993033198e-06, "loss": 0.0014, "step": 10361 }, { "epoch": 2.43, "learning_rate": 1.8196396564396113e-06, "loss": 0.0008, "step": 10362 }, { "epoch": 2.43, "learning_rate": 1.8181822389988557e-06, "loss": 0.0003, "step": 10363 }, { "epoch": 2.43, "learning_rate": 1.816725347074667e-06, "loss": 0.0045, "step": 10364 }, { "epoch": 2.43, "learning_rate": 1.8152689807606184e-06, "loss": 0.0345, "step": 10365 }, { "epoch": 2.43, "learning_rate": 1.8138131401502534e-06, "loss": 0.0002, "step": 10366 }, { "epoch": 2.43, "learning_rate": 1.8123578253370766e-06, "loss": 0.0053, "step": 10367 }, { "epoch": 2.43, "learning_rate": 1.8109030364145651e-06, "loss": 0.0674, "step": 10368 }, { "epoch": 2.43, "learning_rate": 1.8094487734761634e-06, "loss": 0.0281, "step": 10369 }, { "epoch": 2.43, "learning_rate": 1.8079950366152742e-06, "loss": 0.0121, "step": 10370 }, { "epoch": 2.43, "learning_rate": 1.806541825925272e-06, "loss": 0.037, "step": 10371 }, { "epoch": 2.43, "learning_rate": 1.8050891414994943e-06, "loss": 0.0237, "step": 10372 }, { "epoch": 2.43, "learning_rate": 1.8036369834312494e-06, "loss": 0.0003, "step": 10373 }, { "epoch": 2.43, "learning_rate": 1.802185351813809e-06, "loss": 0.0002, "step": 10374 }, { "epoch": 2.43, "learning_rate": 1.8007342467404098e-06, "loss": 0.0002, "step": 10375 }, { "epoch": 2.44, "learning_rate": 1.7992836683042548e-06, "loss": 0.045, "step": 10376 }, { "epoch": 2.44, "learning_rate": 1.7978336165985176e-06, "loss": 0.0039, "step": 10377 }, { "epoch": 2.44, "learning_rate": 1.7963840917163343e-06, "loss": 0.02, "step": 10378 }, { "epoch": 2.44, "learning_rate": 1.7949350937508025e-06, "loss": 0.0284, "step": 10379 }, { "epoch": 2.44, "learning_rate": 1.7934866227949987e-06, "loss": 0.0063, "step": 10380 }, { "epoch": 2.44, "learning_rate": 1.792038678941953e-06, "loss": 0.061, "step": 10381 }, { "epoch": 2.44, "learning_rate": 1.7905912622846688e-06, "loss": 0.016, "step": 10382 }, { "epoch": 2.44, "learning_rate": 1.7891443729161095e-06, "loss": 0.0021, "step": 10383 }, { "epoch": 2.44, "learning_rate": 1.7876980109292142e-06, "loss": 0.0059, "step": 10384 }, { "epoch": 2.44, "learning_rate": 1.7862521764168794e-06, "loss": 0.0276, "step": 10385 }, { "epoch": 2.44, "learning_rate": 1.784806869471968e-06, "loss": 0.0431, "step": 10386 }, { "epoch": 2.44, "learning_rate": 1.7833620901873182e-06, "loss": 0.032, "step": 10387 }, { "epoch": 2.44, "learning_rate": 1.7819178386557212e-06, "loss": 0.0045, "step": 10388 }, { "epoch": 2.44, "learning_rate": 1.7804741149699468e-06, "loss": 0.0004, "step": 10389 }, { "epoch": 2.44, "learning_rate": 1.779030919222723e-06, "loss": 0.0061, "step": 10390 }, { "epoch": 2.44, "learning_rate": 1.7775882515067455e-06, "loss": 0.0056, "step": 10391 }, { "epoch": 2.44, "learning_rate": 1.776146111914674e-06, "loss": 0.0503, "step": 10392 }, { "epoch": 2.44, "learning_rate": 1.7747045005391417e-06, "loss": 0.0014, "step": 10393 }, { "epoch": 2.44, "learning_rate": 1.7732634174727414e-06, "loss": 0.0012, "step": 10394 }, { "epoch": 2.44, "learning_rate": 1.7718228628080292e-06, "loss": 0.0054, "step": 10395 }, { "epoch": 2.44, "learning_rate": 1.7703828366375386e-06, "loss": 0.0025, "step": 10396 }, { "epoch": 2.44, "learning_rate": 1.7689433390537592e-06, "loss": 0.0314, "step": 10397 }, { "epoch": 2.44, "learning_rate": 1.7675043701491502e-06, "loss": 0.0221, "step": 10398 }, { "epoch": 2.44, "learning_rate": 1.766065930016132e-06, "loss": 0.0495, "step": 10399 }, { "epoch": 2.44, "learning_rate": 1.764628018747101e-06, "loss": 0.0471, "step": 10400 }, { "epoch": 2.44, "learning_rate": 1.7631906364344132e-06, "loss": 0.0356, "step": 10401 }, { "epoch": 2.44, "learning_rate": 1.7617537831703891e-06, "loss": 0.047, "step": 10402 }, { "epoch": 2.44, "learning_rate": 1.760317459047316e-06, "loss": 0.0003, "step": 10403 }, { "epoch": 2.44, "learning_rate": 1.7588816641574513e-06, "loss": 0.0369, "step": 10404 }, { "epoch": 2.44, "learning_rate": 1.757446398593018e-06, "loss": 0.0009, "step": 10405 }, { "epoch": 2.44, "learning_rate": 1.7560116624462008e-06, "loss": 0.0032, "step": 10406 }, { "epoch": 2.44, "learning_rate": 1.7545774558091521e-06, "loss": 0.0104, "step": 10407 }, { "epoch": 2.44, "learning_rate": 1.753143778773989e-06, "loss": 0.0199, "step": 10408 }, { "epoch": 2.44, "learning_rate": 1.7517106314328002e-06, "loss": 0.0024, "step": 10409 }, { "epoch": 2.44, "learning_rate": 1.7502780138776354e-06, "loss": 0.0184, "step": 10410 }, { "epoch": 2.44, "learning_rate": 1.7488459262005076e-06, "loss": 0.0019, "step": 10411 }, { "epoch": 2.44, "learning_rate": 1.7474143684934041e-06, "loss": 0.0393, "step": 10412 }, { "epoch": 2.44, "learning_rate": 1.7459833408482717e-06, "loss": 0.0229, "step": 10413 }, { "epoch": 2.44, "learning_rate": 1.7445528433570258e-06, "loss": 0.0054, "step": 10414 }, { "epoch": 2.44, "learning_rate": 1.7431228761115448e-06, "loss": 0.0052, "step": 10415 }, { "epoch": 2.44, "learning_rate": 1.7416934392036778e-06, "loss": 0.0138, "step": 10416 }, { "epoch": 2.44, "learning_rate": 1.7402645327252366e-06, "loss": 0.0018, "step": 10417 }, { "epoch": 2.45, "learning_rate": 1.7388361567680002e-06, "loss": 0.0195, "step": 10418 }, { "epoch": 2.45, "learning_rate": 1.7374083114237084e-06, "loss": 0.0029, "step": 10419 }, { "epoch": 2.45, "learning_rate": 1.7359809967840745e-06, "loss": 0.0039, "step": 10420 }, { "epoch": 2.45, "learning_rate": 1.734554212940781e-06, "loss": 0.029, "step": 10421 }, { "epoch": 2.45, "learning_rate": 1.733127959985459e-06, "loss": 0.0583, "step": 10422 }, { "epoch": 2.45, "learning_rate": 1.7317022380097237e-06, "loss": 0.001, "step": 10423 }, { "epoch": 2.45, "learning_rate": 1.7302770471051444e-06, "loss": 0.0077, "step": 10424 }, { "epoch": 2.45, "learning_rate": 1.7288523873632656e-06, "loss": 0.0017, "step": 10425 }, { "epoch": 2.45, "learning_rate": 1.7274282588755898e-06, "loss": 0.0376, "step": 10426 }, { "epoch": 2.45, "learning_rate": 1.7260046617335902e-06, "loss": 0.0006, "step": 10427 }, { "epoch": 2.45, "learning_rate": 1.7245815960287004e-06, "loss": 0.0016, "step": 10428 }, { "epoch": 2.45, "learning_rate": 1.7231590618523287e-06, "loss": 0.001, "step": 10429 }, { "epoch": 2.45, "learning_rate": 1.7217370592958426e-06, "loss": 0.0012, "step": 10430 }, { "epoch": 2.45, "learning_rate": 1.720315588450573e-06, "loss": 0.0043, "step": 10431 }, { "epoch": 2.45, "learning_rate": 1.7188946494078262e-06, "loss": 0.0144, "step": 10432 }, { "epoch": 2.45, "learning_rate": 1.7174742422588675e-06, "loss": 0.0405, "step": 10433 }, { "epoch": 2.45, "learning_rate": 1.7160543670949282e-06, "loss": 0.0014, "step": 10434 }, { "epoch": 2.45, "learning_rate": 1.7146350240072051e-06, "loss": 0.0579, "step": 10435 }, { "epoch": 2.45, "learning_rate": 1.7132162130868658e-06, "loss": 0.0012, "step": 10436 }, { "epoch": 2.45, "learning_rate": 1.711797934425039e-06, "loss": 0.0003, "step": 10437 }, { "epoch": 2.45, "learning_rate": 1.7103801881128168e-06, "loss": 0.0178, "step": 10438 }, { "epoch": 2.45, "learning_rate": 1.7089629742412682e-06, "loss": 0.0025, "step": 10439 }, { "epoch": 2.45, "learning_rate": 1.707546292901413e-06, "loss": 0.0005, "step": 10440 }, { "epoch": 2.45, "learning_rate": 1.7061301441842504e-06, "loss": 0.0497, "step": 10441 }, { "epoch": 2.45, "learning_rate": 1.7047145281807364e-06, "loss": 0.0002, "step": 10442 }, { "epoch": 2.45, "learning_rate": 1.7032994449817963e-06, "loss": 0.0005, "step": 10443 }, { "epoch": 2.45, "learning_rate": 1.7018848946783173e-06, "loss": 0.001, "step": 10444 }, { "epoch": 2.45, "learning_rate": 1.7004708773611621e-06, "loss": 0.0019, "step": 10445 }, { "epoch": 2.45, "learning_rate": 1.6990573931211495e-06, "loss": 0.0371, "step": 10446 }, { "epoch": 2.45, "learning_rate": 1.6976444420490645e-06, "loss": 0.007, "step": 10447 }, { "epoch": 2.45, "learning_rate": 1.6962320242356667e-06, "loss": 0.0005, "step": 10448 }, { "epoch": 2.45, "learning_rate": 1.6948201397716713e-06, "loss": 0.0215, "step": 10449 }, { "epoch": 2.45, "learning_rate": 1.6934087887477646e-06, "loss": 0.1076, "step": 10450 }, { "epoch": 2.45, "learning_rate": 1.6919979712545942e-06, "loss": 0.016, "step": 10451 }, { "epoch": 2.45, "learning_rate": 1.690587687382782e-06, "loss": 0.0023, "step": 10452 }, { "epoch": 2.45, "learning_rate": 1.6891779372229077e-06, "loss": 0.0248, "step": 10453 }, { "epoch": 2.45, "learning_rate": 1.687768720865518e-06, "loss": 0.0017, "step": 10454 }, { "epoch": 2.45, "learning_rate": 1.6863600384011291e-06, "loss": 0.0035, "step": 10455 }, { "epoch": 2.45, "learning_rate": 1.6849518899202166e-06, "loss": 0.0004, "step": 10456 }, { "epoch": 2.45, "learning_rate": 1.6835442755132336e-06, "loss": 0.0021, "step": 10457 }, { "epoch": 2.45, "learning_rate": 1.6821371952705812e-06, "loss": 0.0513, "step": 10458 }, { "epoch": 2.45, "learning_rate": 1.6807306492826426e-06, "loss": 0.021, "step": 10459 }, { "epoch": 2.45, "learning_rate": 1.6793246376397543e-06, "loss": 0.0351, "step": 10460 }, { "epoch": 2.46, "learning_rate": 1.6779191604322298e-06, "loss": 0.0507, "step": 10461 }, { "epoch": 2.46, "learning_rate": 1.6765142177503413e-06, "loss": 0.0002, "step": 10462 }, { "epoch": 2.46, "learning_rate": 1.6751098096843232e-06, "loss": 0.0228, "step": 10463 }, { "epoch": 2.46, "learning_rate": 1.6737059363243868e-06, "loss": 0.0012, "step": 10464 }, { "epoch": 2.46, "learning_rate": 1.672302597760701e-06, "loss": 0.0079, "step": 10465 }, { "epoch": 2.46, "learning_rate": 1.6708997940834004e-06, "loss": 0.0007, "step": 10466 }, { "epoch": 2.46, "learning_rate": 1.6694975253825841e-06, "loss": 0.0006, "step": 10467 }, { "epoch": 2.46, "learning_rate": 1.6680957917483265e-06, "loss": 0.0006, "step": 10468 }, { "epoch": 2.46, "learning_rate": 1.6666945932706557e-06, "loss": 0.0085, "step": 10469 }, { "epoch": 2.46, "learning_rate": 1.665293930039572e-06, "loss": 0.0016, "step": 10470 }, { "epoch": 2.46, "learning_rate": 1.6638938021450368e-06, "loss": 0.016, "step": 10471 }, { "epoch": 2.46, "learning_rate": 1.6624942096769837e-06, "loss": 0.0024, "step": 10472 }, { "epoch": 2.46, "learning_rate": 1.6610951527253106e-06, "loss": 0.0085, "step": 10473 }, { "epoch": 2.46, "learning_rate": 1.6596966313798713e-06, "loss": 0.0112, "step": 10474 }, { "epoch": 2.46, "learning_rate": 1.6582986457304983e-06, "loss": 0.0068, "step": 10475 }, { "epoch": 2.46, "learning_rate": 1.6569011958669801e-06, "loss": 0.0398, "step": 10476 }, { "epoch": 2.46, "learning_rate": 1.6555042818790811e-06, "loss": 0.0566, "step": 10477 }, { "epoch": 2.46, "learning_rate": 1.6541079038565156e-06, "loss": 0.001, "step": 10478 }, { "epoch": 2.46, "learning_rate": 1.6527120618889802e-06, "loss": 0.0014, "step": 10479 }, { "epoch": 2.46, "learning_rate": 1.6513167560661236e-06, "loss": 0.0109, "step": 10480 }, { "epoch": 2.46, "learning_rate": 1.6499219864775718e-06, "loss": 0.0367, "step": 10481 }, { "epoch": 2.46, "learning_rate": 1.6485277532129085e-06, "loss": 0.0002, "step": 10482 }, { "epoch": 2.46, "learning_rate": 1.6471340563616822e-06, "loss": 0.054, "step": 10483 }, { "epoch": 2.46, "learning_rate": 1.6457408960134148e-06, "loss": 0.0003, "step": 10484 }, { "epoch": 2.46, "learning_rate": 1.6443482722575853e-06, "loss": 0.0027, "step": 10485 }, { "epoch": 2.46, "learning_rate": 1.6429561851836418e-06, "loss": 0.0085, "step": 10486 }, { "epoch": 2.46, "learning_rate": 1.6415646348809966e-06, "loss": 0.021, "step": 10487 }, { "epoch": 2.46, "learning_rate": 1.6401736214390306e-06, "loss": 0.0088, "step": 10488 }, { "epoch": 2.46, "learning_rate": 1.6387831449470937e-06, "loss": 0.0449, "step": 10489 }, { "epoch": 2.46, "learning_rate": 1.6373932054944851e-06, "loss": 0.034, "step": 10490 }, { "epoch": 2.46, "learning_rate": 1.6360038031704872e-06, "loss": 0.0033, "step": 10491 }, { "epoch": 2.46, "learning_rate": 1.6346149380643383e-06, "loss": 0.0031, "step": 10492 }, { "epoch": 2.46, "learning_rate": 1.6332266102652505e-06, "loss": 0.0642, "step": 10493 }, { "epoch": 2.46, "learning_rate": 1.631838819862387e-06, "loss": 0.0031, "step": 10494 }, { "epoch": 2.46, "learning_rate": 1.6304515669448917e-06, "loss": 0.0221, "step": 10495 }, { "epoch": 2.46, "learning_rate": 1.6290648516018625e-06, "loss": 0.0353, "step": 10496 }, { "epoch": 2.46, "learning_rate": 1.627678673922375e-06, "loss": 0.026, "step": 10497 }, { "epoch": 2.46, "learning_rate": 1.6262930339954586e-06, "loss": 0.0043, "step": 10498 }, { "epoch": 2.46, "learning_rate": 1.6249079319101102e-06, "loss": 0.0139, "step": 10499 }, { "epoch": 2.46, "learning_rate": 1.623523367755301e-06, "loss": 0.0011, "step": 10500 }, { "epoch": 2.46, "learning_rate": 1.6221393416199572e-06, "loss": 0.0413, "step": 10501 }, { "epoch": 2.46, "learning_rate": 1.6207558535929756e-06, "loss": 0.0049, "step": 10502 }, { "epoch": 2.46, "learning_rate": 1.6193729037632143e-06, "loss": 0.0034, "step": 10503 }, { "epoch": 2.47, "learning_rate": 1.6179904922195054e-06, "loss": 0.0147, "step": 10504 }, { "epoch": 2.47, "learning_rate": 1.6166086190506381e-06, "loss": 0.0036, "step": 10505 }, { "epoch": 2.47, "learning_rate": 1.6152272843453676e-06, "loss": 0.0332, "step": 10506 }, { "epoch": 2.47, "learning_rate": 1.6138464881924221e-06, "loss": 0.0559, "step": 10507 }, { "epoch": 2.47, "learning_rate": 1.6124662306804829e-06, "loss": 0.0052, "step": 10508 }, { "epoch": 2.47, "learning_rate": 1.6110865118982132e-06, "loss": 0.0001, "step": 10509 }, { "epoch": 2.47, "learning_rate": 1.6097073319342227e-06, "loss": 0.0002, "step": 10510 }, { "epoch": 2.47, "learning_rate": 1.6083286908771011e-06, "loss": 0.0108, "step": 10511 }, { "epoch": 2.47, "learning_rate": 1.6069505888153937e-06, "loss": 0.0003, "step": 10512 }, { "epoch": 2.47, "learning_rate": 1.6055730258376245e-06, "loss": 0.0193, "step": 10513 }, { "epoch": 2.47, "learning_rate": 1.6041960020322634e-06, "loss": 0.0349, "step": 10514 }, { "epoch": 2.47, "learning_rate": 1.6028195174877615e-06, "loss": 0.0007, "step": 10515 }, { "epoch": 2.47, "learning_rate": 1.6014435722925325e-06, "loss": 0.0184, "step": 10516 }, { "epoch": 2.47, "learning_rate": 1.6000681665349505e-06, "loss": 0.0529, "step": 10517 }, { "epoch": 2.47, "learning_rate": 1.5986933003033577e-06, "loss": 0.0114, "step": 10518 }, { "epoch": 2.47, "learning_rate": 1.5973189736860595e-06, "loss": 0.0009, "step": 10519 }, { "epoch": 2.47, "learning_rate": 1.5959451867713326e-06, "loss": 0.0171, "step": 10520 }, { "epoch": 2.47, "learning_rate": 1.5945719396474124e-06, "loss": 0.0017, "step": 10521 }, { "epoch": 2.47, "learning_rate": 1.5931992324025037e-06, "loss": 0.0102, "step": 10522 }, { "epoch": 2.47, "learning_rate": 1.5918270651247724e-06, "loss": 0.0013, "step": 10523 }, { "epoch": 2.47, "learning_rate": 1.590455437902354e-06, "loss": 0.0003, "step": 10524 }, { "epoch": 2.47, "learning_rate": 1.5890843508233532e-06, "loss": 0.0061, "step": 10525 }, { "epoch": 2.47, "learning_rate": 1.5877138039758256e-06, "loss": 0.0018, "step": 10526 }, { "epoch": 2.47, "learning_rate": 1.586343797447807e-06, "loss": 0.0063, "step": 10527 }, { "epoch": 2.47, "learning_rate": 1.5849743313272891e-06, "loss": 0.0194, "step": 10528 }, { "epoch": 2.47, "learning_rate": 1.5836054057022398e-06, "loss": 0.0027, "step": 10529 }, { "epoch": 2.47, "learning_rate": 1.5822370206605742e-06, "loss": 0.0028, "step": 10530 }, { "epoch": 2.47, "learning_rate": 1.5808691762901896e-06, "loss": 0.0211, "step": 10531 }, { "epoch": 2.47, "learning_rate": 1.5795018726789436e-06, "loss": 0.0113, "step": 10532 }, { "epoch": 2.47, "learning_rate": 1.5781351099146547e-06, "loss": 0.0019, "step": 10533 }, { "epoch": 2.47, "learning_rate": 1.576768888085113e-06, "loss": 0.0032, "step": 10534 }, { "epoch": 2.47, "learning_rate": 1.5754032072780645e-06, "loss": 0.0003, "step": 10535 }, { "epoch": 2.47, "learning_rate": 1.5740380675812338e-06, "loss": 0.0056, "step": 10536 }, { "epoch": 2.47, "learning_rate": 1.5726734690822998e-06, "loss": 0.0273, "step": 10537 }, { "epoch": 2.47, "learning_rate": 1.571309411868912e-06, "loss": 0.014, "step": 10538 }, { "epoch": 2.47, "learning_rate": 1.5699458960286795e-06, "loss": 0.0053, "step": 10539 }, { "epoch": 2.47, "learning_rate": 1.5685829216491844e-06, "loss": 0.0047, "step": 10540 }, { "epoch": 2.47, "learning_rate": 1.5672204888179742e-06, "loss": 0.0522, "step": 10541 }, { "epoch": 2.47, "learning_rate": 1.5658585976225482e-06, "loss": 0.001, "step": 10542 }, { "epoch": 2.47, "learning_rate": 1.5644972481503873e-06, "loss": 0.0235, "step": 10543 }, { "epoch": 2.47, "learning_rate": 1.563136440488927e-06, "loss": 0.0039, "step": 10544 }, { "epoch": 2.47, "learning_rate": 1.5617761747255777e-06, "loss": 0.0013, "step": 10545 }, { "epoch": 2.48, "learning_rate": 1.5604164509477004e-06, "loss": 0.0009, "step": 10546 }, { "epoch": 2.48, "learning_rate": 1.5590572692426376e-06, "loss": 0.0345, "step": 10547 }, { "epoch": 2.48, "learning_rate": 1.5576986296976815e-06, "loss": 0.0001, "step": 10548 }, { "epoch": 2.48, "learning_rate": 1.5563405324001056e-06, "loss": 0.0021, "step": 10549 }, { "epoch": 2.48, "learning_rate": 1.554982977437136e-06, "loss": 0.0013, "step": 10550 }, { "epoch": 2.48, "learning_rate": 1.553625964895965e-06, "loss": 0.0022, "step": 10551 }, { "epoch": 2.48, "learning_rate": 1.5522694948637606e-06, "loss": 0.0954, "step": 10552 }, { "epoch": 2.48, "learning_rate": 1.5509135674276443e-06, "loss": 0.0027, "step": 10553 }, { "epoch": 2.48, "learning_rate": 1.5495581826747076e-06, "loss": 0.0092, "step": 10554 }, { "epoch": 2.48, "learning_rate": 1.5482033406920038e-06, "loss": 0.0057, "step": 10555 }, { "epoch": 2.48, "learning_rate": 1.5468490415665595e-06, "loss": 0.0067, "step": 10556 }, { "epoch": 2.48, "learning_rate": 1.5454952853853579e-06, "loss": 0.0009, "step": 10557 }, { "epoch": 2.48, "learning_rate": 1.5441420722353495e-06, "loss": 0.0012, "step": 10558 }, { "epoch": 2.48, "learning_rate": 1.5427894022034539e-06, "loss": 0.0185, "step": 10559 }, { "epoch": 2.48, "learning_rate": 1.5414372753765493e-06, "loss": 0.0064, "step": 10560 }, { "epoch": 2.48, "learning_rate": 1.5400856918414897e-06, "loss": 0.001, "step": 10561 }, { "epoch": 2.48, "learning_rate": 1.5387346516850764e-06, "loss": 0.0399, "step": 10562 }, { "epoch": 2.48, "learning_rate": 1.537384154994096e-06, "loss": 0.004, "step": 10563 }, { "epoch": 2.48, "learning_rate": 1.5360342018552831e-06, "loss": 0.0162, "step": 10564 }, { "epoch": 2.48, "learning_rate": 1.5346847923553532e-06, "loss": 0.0002, "step": 10565 }, { "epoch": 2.48, "learning_rate": 1.5333359265809688e-06, "loss": 0.0054, "step": 10566 }, { "epoch": 2.48, "learning_rate": 1.531987604618773e-06, "loss": 0.0024, "step": 10567 }, { "epoch": 2.48, "learning_rate": 1.5306398265553701e-06, "loss": 0.032, "step": 10568 }, { "epoch": 2.48, "learning_rate": 1.529292592477325e-06, "loss": 0.0523, "step": 10569 }, { "epoch": 2.48, "learning_rate": 1.5279459024711706e-06, "loss": 0.0538, "step": 10570 }, { "epoch": 2.48, "learning_rate": 1.5265997566234015e-06, "loss": 0.0054, "step": 10571 }, { "epoch": 2.48, "learning_rate": 1.5252541550204858e-06, "loss": 0.0971, "step": 10572 }, { "epoch": 2.48, "learning_rate": 1.5239090977488491e-06, "loss": 0.0007, "step": 10573 }, { "epoch": 2.48, "learning_rate": 1.522564584894881e-06, "loss": 0.0021, "step": 10574 }, { "epoch": 2.48, "learning_rate": 1.521220616544944e-06, "loss": 0.0108, "step": 10575 }, { "epoch": 2.48, "learning_rate": 1.5198771927853572e-06, "loss": 0.001, "step": 10576 }, { "epoch": 2.48, "learning_rate": 1.5185343137024156e-06, "loss": 0.0351, "step": 10577 }, { "epoch": 2.48, "learning_rate": 1.517191979382361e-06, "loss": 0.0039, "step": 10578 }, { "epoch": 2.48, "learning_rate": 1.5158501899114197e-06, "loss": 0.0313, "step": 10579 }, { "epoch": 2.48, "learning_rate": 1.5145089453757688e-06, "loss": 0.0489, "step": 10580 }, { "epoch": 2.48, "learning_rate": 1.513168245861565e-06, "loss": 0.0156, "step": 10581 }, { "epoch": 2.48, "learning_rate": 1.5118280914549099e-06, "loss": 0.0486, "step": 10582 }, { "epoch": 2.48, "learning_rate": 1.5104884822418875e-06, "loss": 0.0373, "step": 10583 }, { "epoch": 2.48, "learning_rate": 1.5091494183085408e-06, "loss": 0.0142, "step": 10584 }, { "epoch": 2.48, "learning_rate": 1.507810899740878e-06, "loss": 0.0038, "step": 10585 }, { "epoch": 2.48, "learning_rate": 1.5064729266248712e-06, "loss": 0.0089, "step": 10586 }, { "epoch": 2.48, "learning_rate": 1.505135499046455e-06, "loss": 0.0073, "step": 10587 }, { "epoch": 2.48, "learning_rate": 1.5037986170915364e-06, "loss": 0.0029, "step": 10588 }, { "epoch": 2.49, "learning_rate": 1.5024622808459822e-06, "loss": 0.0365, "step": 10589 }, { "epoch": 2.49, "learning_rate": 1.5011264903956235e-06, "loss": 0.0142, "step": 10590 }, { "epoch": 2.49, "learning_rate": 1.4997912458262564e-06, "loss": 0.0171, "step": 10591 }, { "epoch": 2.49, "learning_rate": 1.4984565472236478e-06, "loss": 0.0216, "step": 10592 }, { "epoch": 2.49, "learning_rate": 1.4971223946735237e-06, "loss": 0.0065, "step": 10593 }, { "epoch": 2.49, "learning_rate": 1.495788788261573e-06, "loss": 0.0002, "step": 10594 }, { "epoch": 2.49, "learning_rate": 1.4944557280734572e-06, "loss": 0.0098, "step": 10595 }, { "epoch": 2.49, "learning_rate": 1.493123214194796e-06, "loss": 0.0234, "step": 10596 }, { "epoch": 2.49, "learning_rate": 1.4917912467111816e-06, "loss": 0.0123, "step": 10597 }, { "epoch": 2.49, "learning_rate": 1.490459825708157e-06, "loss": 0.0008, "step": 10598 }, { "epoch": 2.49, "learning_rate": 1.489128951271246e-06, "loss": 0.0008, "step": 10599 }, { "epoch": 2.49, "learning_rate": 1.4877986234859264e-06, "loss": 0.0218, "step": 10600 }, { "epoch": 2.49, "learning_rate": 1.4864688424376494e-06, "loss": 0.0112, "step": 10601 }, { "epoch": 2.49, "learning_rate": 1.4851396082118242e-06, "loss": 0.0105, "step": 10602 }, { "epoch": 2.49, "learning_rate": 1.4838109208938244e-06, "loss": 0.009, "step": 10603 }, { "epoch": 2.49, "learning_rate": 1.4824827805689967e-06, "loss": 0.0157, "step": 10604 }, { "epoch": 2.49, "learning_rate": 1.4811551873226449e-06, "loss": 0.0484, "step": 10605 }, { "epoch": 2.49, "learning_rate": 1.4798281412400395e-06, "loss": 0.0042, "step": 10606 }, { "epoch": 2.49, "learning_rate": 1.4785016424064135e-06, "loss": 0.014, "step": 10607 }, { "epoch": 2.49, "learning_rate": 1.4771756909069746e-06, "loss": 0.0002, "step": 10608 }, { "epoch": 2.49, "learning_rate": 1.4758502868268842e-06, "loss": 0.0015, "step": 10609 }, { "epoch": 2.49, "learning_rate": 1.4745254302512702e-06, "loss": 0.0146, "step": 10610 }, { "epoch": 2.49, "learning_rate": 1.4732011212652343e-06, "loss": 0.0004, "step": 10611 }, { "epoch": 2.49, "learning_rate": 1.4718773599538304e-06, "loss": 0.0042, "step": 10612 }, { "epoch": 2.49, "learning_rate": 1.4705541464020913e-06, "loss": 0.0026, "step": 10613 }, { "epoch": 2.49, "learning_rate": 1.469231480694997e-06, "loss": 0.0006, "step": 10614 }, { "epoch": 2.49, "learning_rate": 1.4679093629175089e-06, "loss": 0.0088, "step": 10615 }, { "epoch": 2.49, "learning_rate": 1.4665877931545413e-06, "loss": 0.0035, "step": 10616 }, { "epoch": 2.49, "learning_rate": 1.4652667714909842e-06, "loss": 0.0022, "step": 10617 }, { "epoch": 2.49, "learning_rate": 1.4639462980116836e-06, "loss": 0.0004, "step": 10618 }, { "epoch": 2.49, "learning_rate": 1.462626372801451e-06, "loss": 0.0315, "step": 10619 }, { "epoch": 2.49, "learning_rate": 1.4613069959450698e-06, "loss": 0.0151, "step": 10620 }, { "epoch": 2.49, "learning_rate": 1.459988167527281e-06, "loss": 0.0045, "step": 10621 }, { "epoch": 2.49, "learning_rate": 1.458669887632792e-06, "loss": 0.0046, "step": 10622 }, { "epoch": 2.49, "learning_rate": 1.4573521563462744e-06, "loss": 0.002, "step": 10623 }, { "epoch": 2.49, "learning_rate": 1.4560349737523694e-06, "loss": 0.0143, "step": 10624 }, { "epoch": 2.49, "learning_rate": 1.4547183399356779e-06, "loss": 0.0026, "step": 10625 }, { "epoch": 2.49, "learning_rate": 1.4534022549807636e-06, "loss": 0.0181, "step": 10626 }, { "epoch": 2.49, "learning_rate": 1.4520867189721644e-06, "loss": 0.0008, "step": 10627 }, { "epoch": 2.49, "learning_rate": 1.4507717319943737e-06, "loss": 0.0099, "step": 10628 }, { "epoch": 2.49, "learning_rate": 1.449457294131853e-06, "loss": 0.024, "step": 10629 }, { "epoch": 2.49, "learning_rate": 1.4481434054690269e-06, "loss": 0.02, "step": 10630 }, { "epoch": 2.5, "learning_rate": 1.4468300660902902e-06, "loss": 0.0003, "step": 10631 }, { "epoch": 2.5, "learning_rate": 1.445517276079993e-06, "loss": 0.0008, "step": 10632 }, { "epoch": 2.5, "learning_rate": 1.4442050355224635e-06, "loss": 0.0018, "step": 10633 }, { "epoch": 2.5, "learning_rate": 1.4428933445019777e-06, "loss": 0.0127, "step": 10634 }, { "epoch": 2.5, "learning_rate": 1.441582203102788e-06, "loss": 0.0308, "step": 10635 }, { "epoch": 2.5, "learning_rate": 1.4402716114091142e-06, "loss": 0.0004, "step": 10636 }, { "epoch": 2.5, "learning_rate": 1.4389615695051295e-06, "loss": 0.0277, "step": 10637 }, { "epoch": 2.5, "learning_rate": 1.4376520774749802e-06, "loss": 0.0529, "step": 10638 }, { "epoch": 2.5, "learning_rate": 1.4363431354027724e-06, "loss": 0.0182, "step": 10639 }, { "epoch": 2.5, "learning_rate": 1.435034743372582e-06, "loss": 0.0202, "step": 10640 }, { "epoch": 2.5, "learning_rate": 1.4337269014684452e-06, "loss": 0.0002, "step": 10641 }, { "epoch": 2.5, "learning_rate": 1.432419609774366e-06, "loss": 0.02, "step": 10642 }, { "epoch": 2.5, "learning_rate": 1.4311128683743069e-06, "loss": 0.0087, "step": 10643 }, { "epoch": 2.5, "learning_rate": 1.4298066773522046e-06, "loss": 0.0059, "step": 10644 }, { "epoch": 2.5, "learning_rate": 1.428501036791955e-06, "loss": 0.0432, "step": 10645 }, { "epoch": 2.5, "learning_rate": 1.4271959467774155e-06, "loss": 0.005, "step": 10646 }, { "epoch": 2.5, "learning_rate": 1.4258914073924158e-06, "loss": 0.0026, "step": 10647 }, { "epoch": 2.5, "learning_rate": 1.424587418720743e-06, "loss": 0.0353, "step": 10648 }, { "epoch": 2.5, "learning_rate": 1.4232839808461585e-06, "loss": 0.0433, "step": 10649 }, { "epoch": 2.5, "learning_rate": 1.4219810938523727e-06, "loss": 0.0061, "step": 10650 }, { "epoch": 2.5, "learning_rate": 1.4206787578230752e-06, "loss": 0.0123, "step": 10651 }, { "epoch": 2.5, "learning_rate": 1.4193769728419127e-06, "loss": 0.0205, "step": 10652 }, { "epoch": 2.5, "learning_rate": 1.4180757389925004e-06, "loss": 0.0019, "step": 10653 }, { "epoch": 2.5, "learning_rate": 1.416775056358417e-06, "loss": 0.0052, "step": 10654 }, { "epoch": 2.5, "learning_rate": 1.4154749250232003e-06, "loss": 0.0338, "step": 10655 }, { "epoch": 2.5, "learning_rate": 1.4141753450703643e-06, "loss": 0.0005, "step": 10656 }, { "epoch": 2.5, "learning_rate": 1.4128763165833759e-06, "loss": 0.0254, "step": 10657 }, { "epoch": 2.5, "learning_rate": 1.4115778396456737e-06, "loss": 0.0004, "step": 10658 }, { "epoch": 2.5, "learning_rate": 1.4102799143406553e-06, "loss": 0.0006, "step": 10659 }, { "epoch": 2.5, "learning_rate": 1.408982540751691e-06, "loss": 0.0084, "step": 10660 }, { "epoch": 2.5, "learning_rate": 1.4076857189621074e-06, "loss": 0.0461, "step": 10661 }, { "epoch": 2.5, "learning_rate": 1.4063894490551988e-06, "loss": 0.0027, "step": 10662 }, { "epoch": 2.5, "learning_rate": 1.4050937311142287e-06, "loss": 0.0005, "step": 10663 }, { "epoch": 2.5, "learning_rate": 1.4037985652224162e-06, "loss": 0.0062, "step": 10664 }, { "epoch": 2.5, "learning_rate": 1.4025039514629524e-06, "loss": 0.0001, "step": 10665 }, { "epoch": 2.5, "learning_rate": 1.4012098899189863e-06, "loss": 0.0012, "step": 10666 }, { "epoch": 2.5, "learning_rate": 1.3999163806736405e-06, "loss": 0.0017, "step": 10667 }, { "epoch": 2.5, "learning_rate": 1.3986234238099916e-06, "loss": 0.005, "step": 10668 }, { "epoch": 2.5, "learning_rate": 1.3973310194110913e-06, "loss": 0.0053, "step": 10669 }, { "epoch": 2.5, "learning_rate": 1.3960391675599472e-06, "loss": 0.0123, "step": 10670 }, { "epoch": 2.5, "learning_rate": 1.394747868339532e-06, "loss": 0.0064, "step": 10671 }, { "epoch": 2.5, "learning_rate": 1.3934571218327931e-06, "loss": 0.0003, "step": 10672 }, { "epoch": 2.5, "learning_rate": 1.392166928122629e-06, "loss": 0.0015, "step": 10673 }, { "epoch": 2.51, "learning_rate": 1.3908772872919108e-06, "loss": 0.001, "step": 10674 }, { "epoch": 2.51, "learning_rate": 1.3895881994234695e-06, "loss": 0.0023, "step": 10675 }, { "epoch": 2.51, "learning_rate": 1.3882996646001056e-06, "loss": 0.0061, "step": 10676 }, { "epoch": 2.51, "learning_rate": 1.387011682904582e-06, "loss": 0.0702, "step": 10677 }, { "epoch": 2.51, "learning_rate": 1.3857242544196214e-06, "loss": 0.0007, "step": 10678 }, { "epoch": 2.51, "learning_rate": 1.3844373792279198e-06, "loss": 0.001, "step": 10679 }, { "epoch": 2.51, "learning_rate": 1.383151057412131e-06, "loss": 0.0017, "step": 10680 }, { "epoch": 2.51, "learning_rate": 1.3818652890548757e-06, "loss": 0.0009, "step": 10681 }, { "epoch": 2.51, "learning_rate": 1.3805800742387355e-06, "loss": 0.0354, "step": 10682 }, { "epoch": 2.51, "learning_rate": 1.3792954130462633e-06, "loss": 0.0035, "step": 10683 }, { "epoch": 2.51, "learning_rate": 1.3780113055599697e-06, "loss": 0.0039, "step": 10684 }, { "epoch": 2.51, "learning_rate": 1.376727751862339e-06, "loss": 0.0168, "step": 10685 }, { "epoch": 2.51, "learning_rate": 1.375444752035805e-06, "loss": 0.0011, "step": 10686 }, { "epoch": 2.51, "learning_rate": 1.3741623061627773e-06, "loss": 0.0004, "step": 10687 }, { "epoch": 2.51, "learning_rate": 1.3728804143256313e-06, "loss": 0.0032, "step": 10688 }, { "epoch": 2.51, "learning_rate": 1.371599076606699e-06, "loss": 0.0016, "step": 10689 }, { "epoch": 2.51, "learning_rate": 1.3703182930882807e-06, "loss": 0.0009, "step": 10690 }, { "epoch": 2.51, "learning_rate": 1.3690380638526401e-06, "loss": 0.0013, "step": 10691 }, { "epoch": 2.51, "learning_rate": 1.367758388982009e-06, "loss": 0.0093, "step": 10692 }, { "epoch": 2.51, "learning_rate": 1.3664792685585792e-06, "loss": 0.0174, "step": 10693 }, { "epoch": 2.51, "learning_rate": 1.3652007026645053e-06, "loss": 0.0143, "step": 10694 }, { "epoch": 2.51, "learning_rate": 1.363922691381915e-06, "loss": 0.0024, "step": 10695 }, { "epoch": 2.51, "learning_rate": 1.362645234792892e-06, "loss": 0.0053, "step": 10696 }, { "epoch": 2.51, "learning_rate": 1.361368332979487e-06, "loss": 0.0113, "step": 10697 }, { "epoch": 2.51, "learning_rate": 1.3600919860237117e-06, "loss": 0.0079, "step": 10698 }, { "epoch": 2.51, "learning_rate": 1.3588161940075516e-06, "loss": 0.0073, "step": 10699 }, { "epoch": 2.51, "learning_rate": 1.3575409570129483e-06, "loss": 0.0171, "step": 10700 }, { "epoch": 2.51, "learning_rate": 1.3562662751218103e-06, "loss": 0.0053, "step": 10701 }, { "epoch": 2.51, "learning_rate": 1.3549921484160067e-06, "loss": 0.0548, "step": 10702 }, { "epoch": 2.51, "learning_rate": 1.3537185769773774e-06, "loss": 0.0018, "step": 10703 }, { "epoch": 2.51, "learning_rate": 1.352445560887725e-06, "loss": 0.0195, "step": 10704 }, { "epoch": 2.51, "learning_rate": 1.3511731002288142e-06, "loss": 0.0008, "step": 10705 }, { "epoch": 2.51, "learning_rate": 1.3499011950823748e-06, "loss": 0.0091, "step": 10706 }, { "epoch": 2.51, "learning_rate": 1.3486298455300983e-06, "loss": 0.009, "step": 10707 }, { "epoch": 2.51, "learning_rate": 1.3473590516536473e-06, "loss": 0.0013, "step": 10708 }, { "epoch": 2.51, "learning_rate": 1.3460888135346428e-06, "loss": 0.0015, "step": 10709 }, { "epoch": 2.51, "learning_rate": 1.3448191312546722e-06, "loss": 0.0004, "step": 10710 }, { "epoch": 2.51, "learning_rate": 1.3435500048952832e-06, "loss": 0.0085, "step": 10711 }, { "epoch": 2.51, "learning_rate": 1.3422814345379987e-06, "loss": 0.0165, "step": 10712 }, { "epoch": 2.51, "learning_rate": 1.3410134202642943e-06, "loss": 0.004, "step": 10713 }, { "epoch": 2.51, "learning_rate": 1.339745962155613e-06, "loss": 0.0184, "step": 10714 }, { "epoch": 2.51, "learning_rate": 1.3384790602933684e-06, "loss": 0.0249, "step": 10715 }, { "epoch": 2.51, "learning_rate": 1.3372127147589297e-06, "loss": 0.0099, "step": 10716 }, { "epoch": 2.52, "learning_rate": 1.3359469256336356e-06, "loss": 0.0005, "step": 10717 }, { "epoch": 2.52, "learning_rate": 1.3346816929987827e-06, "loss": 0.0031, "step": 10718 }, { "epoch": 2.52, "learning_rate": 1.3334170169356441e-06, "loss": 0.0012, "step": 10719 }, { "epoch": 2.52, "learning_rate": 1.3321528975254461e-06, "loss": 0.0037, "step": 10720 }, { "epoch": 2.52, "learning_rate": 1.3308893348493802e-06, "loss": 0.0026, "step": 10721 }, { "epoch": 2.52, "learning_rate": 1.3296263289886102e-06, "loss": 0.0202, "step": 10722 }, { "epoch": 2.52, "learning_rate": 1.3283638800242538e-06, "loss": 0.0774, "step": 10723 }, { "epoch": 2.52, "learning_rate": 1.3271019880374026e-06, "loss": 0.001, "step": 10724 }, { "epoch": 2.52, "learning_rate": 1.3258406531091038e-06, "loss": 0.0251, "step": 10725 }, { "epoch": 2.52, "learning_rate": 1.3245798753203753e-06, "loss": 0.0002, "step": 10726 }, { "epoch": 2.52, "learning_rate": 1.3233196547521932e-06, "loss": 0.029, "step": 10727 }, { "epoch": 2.52, "learning_rate": 1.322059991485506e-06, "loss": 0.0061, "step": 10728 }, { "epoch": 2.52, "learning_rate": 1.3208008856012177e-06, "loss": 0.0094, "step": 10729 }, { "epoch": 2.52, "learning_rate": 1.3195423371801997e-06, "loss": 0.0645, "step": 10730 }, { "epoch": 2.52, "learning_rate": 1.3182843463032934e-06, "loss": 0.0001, "step": 10731 }, { "epoch": 2.52, "learning_rate": 1.3170269130512958e-06, "loss": 0.0002, "step": 10732 }, { "epoch": 2.52, "learning_rate": 1.315770037504972e-06, "loss": 0.0001, "step": 10733 }, { "epoch": 2.52, "learning_rate": 1.3145137197450497e-06, "loss": 0.0011, "step": 10734 }, { "epoch": 2.52, "learning_rate": 1.3132579598522245e-06, "loss": 0.0002, "step": 10735 }, { "epoch": 2.52, "learning_rate": 1.3120027579071516e-06, "loss": 0.0474, "step": 10736 }, { "epoch": 2.52, "learning_rate": 1.310748113990452e-06, "loss": 0.0221, "step": 10737 }, { "epoch": 2.52, "learning_rate": 1.309494028182714e-06, "loss": 0.0058, "step": 10738 }, { "epoch": 2.52, "learning_rate": 1.308240500564484e-06, "loss": 0.0192, "step": 10739 }, { "epoch": 2.52, "learning_rate": 1.3069875312162794e-06, "loss": 0.0076, "step": 10740 }, { "epoch": 2.52, "learning_rate": 1.3057351202185753e-06, "loss": 0.0001, "step": 10741 }, { "epoch": 2.52, "learning_rate": 1.304483267651816e-06, "loss": 0.0002, "step": 10742 }, { "epoch": 2.52, "learning_rate": 1.3032319735964039e-06, "loss": 0.0318, "step": 10743 }, { "epoch": 2.52, "learning_rate": 1.3019812381327134e-06, "loss": 0.001, "step": 10744 }, { "epoch": 2.52, "learning_rate": 1.3007310613410784e-06, "loss": 0.0003, "step": 10745 }, { "epoch": 2.52, "learning_rate": 1.2994814433017944e-06, "loss": 0.0177, "step": 10746 }, { "epoch": 2.52, "learning_rate": 1.2982323840951282e-06, "loss": 0.001, "step": 10747 }, { "epoch": 2.52, "learning_rate": 1.296983883801305e-06, "loss": 0.1068, "step": 10748 }, { "epoch": 2.52, "learning_rate": 1.2957359425005157e-06, "loss": 0.0032, "step": 10749 }, { "epoch": 2.52, "learning_rate": 1.294488560272913e-06, "loss": 0.0033, "step": 10750 }, { "epoch": 2.52, "learning_rate": 1.2932417371986195e-06, "loss": 0.0002, "step": 10751 }, { "epoch": 2.52, "learning_rate": 1.2919954733577178e-06, "loss": 0.0196, "step": 10752 }, { "epoch": 2.52, "learning_rate": 1.290749768830255e-06, "loss": 0.0008, "step": 10753 }, { "epoch": 2.52, "learning_rate": 1.2895046236962393e-06, "loss": 0.0125, "step": 10754 }, { "epoch": 2.52, "learning_rate": 1.2882600380356481e-06, "loss": 0.0324, "step": 10755 }, { "epoch": 2.52, "learning_rate": 1.287016011928427e-06, "loss": 0.0044, "step": 10756 }, { "epoch": 2.52, "learning_rate": 1.285772545454469e-06, "loss": 0.0025, "step": 10757 }, { "epoch": 2.52, "learning_rate": 1.284529638693649e-06, "loss": 0.0091, "step": 10758 }, { "epoch": 2.53, "learning_rate": 1.2832872917257954e-06, "loss": 0.004, "step": 10759 }, { "epoch": 2.53, "learning_rate": 1.2820455046307057e-06, "loss": 0.0951, "step": 10760 }, { "epoch": 2.53, "learning_rate": 1.2808042774881403e-06, "loss": 0.0108, "step": 10761 }, { "epoch": 2.53, "learning_rate": 1.2795636103778209e-06, "loss": 0.003, "step": 10762 }, { "epoch": 2.53, "learning_rate": 1.2783235033794338e-06, "loss": 0.0045, "step": 10763 }, { "epoch": 2.53, "learning_rate": 1.2770839565726346e-06, "loss": 0.0011, "step": 10764 }, { "epoch": 2.53, "learning_rate": 1.275844970037039e-06, "loss": 0.0488, "step": 10765 }, { "epoch": 2.53, "learning_rate": 1.2746065438522227e-06, "loss": 0.0007, "step": 10766 }, { "epoch": 2.53, "learning_rate": 1.2733686780977339e-06, "loss": 0.0249, "step": 10767 }, { "epoch": 2.53, "learning_rate": 1.2721313728530792e-06, "loss": 0.0474, "step": 10768 }, { "epoch": 2.53, "learning_rate": 1.2708946281977307e-06, "loss": 0.0051, "step": 10769 }, { "epoch": 2.53, "learning_rate": 1.2696584442111214e-06, "loss": 0.0112, "step": 10770 }, { "epoch": 2.53, "learning_rate": 1.2684228209726558e-06, "loss": 0.0524, "step": 10771 }, { "epoch": 2.53, "learning_rate": 1.2671877585616942e-06, "loss": 0.0031, "step": 10772 }, { "epoch": 2.53, "learning_rate": 1.265953257057565e-06, "loss": 0.0009, "step": 10773 }, { "epoch": 2.53, "learning_rate": 1.2647193165395622e-06, "loss": 0.018, "step": 10774 }, { "epoch": 2.53, "learning_rate": 1.263485937086938e-06, "loss": 0.0583, "step": 10775 }, { "epoch": 2.53, "learning_rate": 1.2622531187789166e-06, "loss": 0.002, "step": 10776 }, { "epoch": 2.53, "learning_rate": 1.2610208616946795e-06, "loss": 0.0335, "step": 10777 }, { "epoch": 2.53, "learning_rate": 1.2597891659133732e-06, "loss": 0.0008, "step": 10778 }, { "epoch": 2.53, "learning_rate": 1.2585580315141077e-06, "loss": 0.0008, "step": 10779 }, { "epoch": 2.53, "learning_rate": 1.2573274585759643e-06, "loss": 0.0035, "step": 10780 }, { "epoch": 2.53, "learning_rate": 1.2560974471779787e-06, "loss": 0.0075, "step": 10781 }, { "epoch": 2.53, "learning_rate": 1.254867997399153e-06, "loss": 0.0139, "step": 10782 }, { "epoch": 2.53, "learning_rate": 1.2536391093184575e-06, "loss": 0.0003, "step": 10783 }, { "epoch": 2.53, "learning_rate": 1.2524107830148223e-06, "loss": 0.0005, "step": 10784 }, { "epoch": 2.53, "learning_rate": 1.2511830185671426e-06, "loss": 0.0086, "step": 10785 }, { "epoch": 2.53, "learning_rate": 1.2499558160542757e-06, "loss": 0.0064, "step": 10786 }, { "epoch": 2.53, "learning_rate": 1.2487291755550478e-06, "loss": 0.0005, "step": 10787 }, { "epoch": 2.53, "learning_rate": 1.2475030971482437e-06, "loss": 0.0132, "step": 10788 }, { "epoch": 2.53, "learning_rate": 1.2462775809126136e-06, "loss": 0.0048, "step": 10789 }, { "epoch": 2.53, "learning_rate": 1.2450526269268747e-06, "loss": 0.066, "step": 10790 }, { "epoch": 2.53, "learning_rate": 1.2438282352697017e-06, "loss": 0.037, "step": 10791 }, { "epoch": 2.53, "learning_rate": 1.242604406019744e-06, "loss": 0.0708, "step": 10792 }, { "epoch": 2.53, "learning_rate": 1.2413811392555996e-06, "loss": 0.0295, "step": 10793 }, { "epoch": 2.53, "learning_rate": 1.2401584350558437e-06, "loss": 0.004, "step": 10794 }, { "epoch": 2.53, "learning_rate": 1.2389362934990078e-06, "loss": 0.0003, "step": 10795 }, { "epoch": 2.53, "learning_rate": 1.2377147146635925e-06, "loss": 0.0003, "step": 10796 }, { "epoch": 2.53, "learning_rate": 1.2364936986280584e-06, "loss": 0.0604, "step": 10797 }, { "epoch": 2.53, "learning_rate": 1.2352732454708293e-06, "loss": 0.0007, "step": 10798 }, { "epoch": 2.53, "learning_rate": 1.2340533552702972e-06, "loss": 0.0018, "step": 10799 }, { "epoch": 2.53, "learning_rate": 1.2328340281048157e-06, "loss": 0.0019, "step": 10800 }, { "epoch": 2.53, "learning_rate": 1.2316152640527002e-06, "loss": 0.0007, "step": 10801 }, { "epoch": 2.54, "learning_rate": 1.2303970631922303e-06, "loss": 0.016, "step": 10802 }, { "epoch": 2.54, "learning_rate": 1.2291794256016543e-06, "loss": 0.0016, "step": 10803 }, { "epoch": 2.54, "learning_rate": 1.2279623513591787e-06, "loss": 0.0028, "step": 10804 }, { "epoch": 2.54, "learning_rate": 1.2267458405429777e-06, "loss": 0.0115, "step": 10805 }, { "epoch": 2.54, "learning_rate": 1.2255298932311831e-06, "loss": 0.0598, "step": 10806 }, { "epoch": 2.54, "learning_rate": 1.2243145095018993e-06, "loss": 0.0243, "step": 10807 }, { "epoch": 2.54, "learning_rate": 1.2230996894331926e-06, "loss": 0.0004, "step": 10808 }, { "epoch": 2.54, "learning_rate": 1.2218854331030827e-06, "loss": 0.0014, "step": 10809 }, { "epoch": 2.54, "learning_rate": 1.2206717405895686e-06, "loss": 0.0015, "step": 10810 }, { "epoch": 2.54, "learning_rate": 1.2194586119705986e-06, "loss": 0.0214, "step": 10811 }, { "epoch": 2.54, "learning_rate": 1.2182460473240986e-06, "loss": 0.0078, "step": 10812 }, { "epoch": 2.54, "learning_rate": 1.2170340467279484e-06, "loss": 0.009, "step": 10813 }, { "epoch": 2.54, "learning_rate": 1.2158226102599946e-06, "loss": 0.0235, "step": 10814 }, { "epoch": 2.54, "learning_rate": 1.2146117379980438e-06, "loss": 0.0082, "step": 10815 }, { "epoch": 2.54, "learning_rate": 1.2134014300198771e-06, "loss": 0.0088, "step": 10816 }, { "epoch": 2.54, "learning_rate": 1.2121916864032291e-06, "loss": 0.0002, "step": 10817 }, { "epoch": 2.54, "learning_rate": 1.2109825072257985e-06, "loss": 0.0003, "step": 10818 }, { "epoch": 2.54, "learning_rate": 1.2097738925652558e-06, "loss": 0.0027, "step": 10819 }, { "epoch": 2.54, "learning_rate": 1.2085658424992274e-06, "loss": 0.0009, "step": 10820 }, { "epoch": 2.54, "learning_rate": 1.207358357105306e-06, "loss": 0.0017, "step": 10821 }, { "epoch": 2.54, "learning_rate": 1.2061514364610472e-06, "loss": 0.0408, "step": 10822 }, { "epoch": 2.54, "learning_rate": 1.2049450806439722e-06, "loss": 0.019, "step": 10823 }, { "epoch": 2.54, "learning_rate": 1.2037392897315691e-06, "loss": 0.0361, "step": 10824 }, { "epoch": 2.54, "learning_rate": 1.2025340638012783e-06, "loss": 0.005, "step": 10825 }, { "epoch": 2.54, "learning_rate": 1.2013294029305167e-06, "loss": 0.0007, "step": 10826 }, { "epoch": 2.54, "learning_rate": 1.2001253071966556e-06, "loss": 0.0047, "step": 10827 }, { "epoch": 2.54, "learning_rate": 1.19892177667704e-06, "loss": 0.0044, "step": 10828 }, { "epoch": 2.54, "learning_rate": 1.1977188114489636e-06, "loss": 0.0007, "step": 10829 }, { "epoch": 2.54, "learning_rate": 1.1965164115897e-06, "loss": 0.0009, "step": 10830 }, { "epoch": 2.54, "learning_rate": 1.195314577176474e-06, "loss": 0.003, "step": 10831 }, { "epoch": 2.54, "learning_rate": 1.1941133082864832e-06, "loss": 0.0133, "step": 10832 }, { "epoch": 2.54, "learning_rate": 1.1929126049968841e-06, "loss": 0.0083, "step": 10833 }, { "epoch": 2.54, "learning_rate": 1.1917124673847935e-06, "loss": 0.0222, "step": 10834 }, { "epoch": 2.54, "learning_rate": 1.1905128955273027e-06, "loss": 0.0012, "step": 10835 }, { "epoch": 2.54, "learning_rate": 1.1893138895014554e-06, "loss": 0.0052, "step": 10836 }, { "epoch": 2.54, "learning_rate": 1.1881154493842651e-06, "loss": 0.0009, "step": 10837 }, { "epoch": 2.54, "learning_rate": 1.1869175752527052e-06, "loss": 0.0007, "step": 10838 }, { "epoch": 2.54, "learning_rate": 1.1857202671837188e-06, "loss": 0.0067, "step": 10839 }, { "epoch": 2.54, "learning_rate": 1.1845235252542053e-06, "loss": 0.0274, "step": 10840 }, { "epoch": 2.54, "learning_rate": 1.1833273495410312e-06, "loss": 0.0007, "step": 10841 }, { "epoch": 2.54, "learning_rate": 1.1821317401210309e-06, "loss": 0.0383, "step": 10842 }, { "epoch": 2.54, "learning_rate": 1.1809366970709924e-06, "loss": 0.0003, "step": 10843 }, { "epoch": 2.55, "learning_rate": 1.17974222046768e-06, "loss": 0.0389, "step": 10844 }, { "epoch": 2.55, "learning_rate": 1.178548310387806e-06, "loss": 0.0007, "step": 10845 }, { "epoch": 2.55, "learning_rate": 1.177354966908062e-06, "loss": 0.0112, "step": 10846 }, { "epoch": 2.55, "learning_rate": 1.1761621901050924e-06, "loss": 0.0004, "step": 10847 }, { "epoch": 2.55, "learning_rate": 1.1749699800555126e-06, "loss": 0.0255, "step": 10848 }, { "epoch": 2.55, "learning_rate": 1.1737783368358945e-06, "loss": 0.0357, "step": 10849 }, { "epoch": 2.55, "learning_rate": 1.1725872605227773e-06, "loss": 0.0003, "step": 10850 }, { "epoch": 2.55, "learning_rate": 1.171396751192667e-06, "loss": 0.0006, "step": 10851 }, { "epoch": 2.55, "learning_rate": 1.1702068089220275e-06, "loss": 0.0118, "step": 10852 }, { "epoch": 2.55, "learning_rate": 1.1690174337872883e-06, "loss": 0.0003, "step": 10853 }, { "epoch": 2.55, "learning_rate": 1.1678286258648408e-06, "loss": 0.0028, "step": 10854 }, { "epoch": 2.55, "learning_rate": 1.1666403852310471e-06, "loss": 0.0234, "step": 10855 }, { "epoch": 2.55, "learning_rate": 1.165452711962224e-06, "loss": 0.0234, "step": 10856 }, { "epoch": 2.55, "learning_rate": 1.1642656061346568e-06, "loss": 0.0092, "step": 10857 }, { "epoch": 2.55, "learning_rate": 1.1630790678245897e-06, "loss": 0.0069, "step": 10858 }, { "epoch": 2.55, "learning_rate": 1.1618930971082376e-06, "loss": 0.0124, "step": 10859 }, { "epoch": 2.55, "learning_rate": 1.1607076940617778e-06, "loss": 0.0539, "step": 10860 }, { "epoch": 2.55, "learning_rate": 1.1595228587613405e-06, "loss": 0.0083, "step": 10861 }, { "epoch": 2.55, "learning_rate": 1.1583385912830347e-06, "loss": 0.0004, "step": 10862 }, { "epoch": 2.55, "learning_rate": 1.1571548917029197e-06, "loss": 0.0039, "step": 10863 }, { "epoch": 2.55, "learning_rate": 1.1559717600970321e-06, "loss": 0.0002, "step": 10864 }, { "epoch": 2.55, "learning_rate": 1.154789196541356e-06, "loss": 0.0025, "step": 10865 }, { "epoch": 2.55, "learning_rate": 1.1536072011118505e-06, "loss": 0.0061, "step": 10866 }, { "epoch": 2.55, "learning_rate": 1.1524257738844368e-06, "loss": 0.0009, "step": 10867 }, { "epoch": 2.55, "learning_rate": 1.1512449149349958e-06, "loss": 0.0616, "step": 10868 }, { "epoch": 2.55, "learning_rate": 1.1500646243393754e-06, "loss": 0.0161, "step": 10869 }, { "epoch": 2.55, "learning_rate": 1.1488849021733817e-06, "loss": 0.0164, "step": 10870 }, { "epoch": 2.55, "learning_rate": 1.1477057485127918e-06, "loss": 0.0203, "step": 10871 }, { "epoch": 2.55, "learning_rate": 1.146527163433342e-06, "loss": 0.0192, "step": 10872 }, { "epoch": 2.55, "learning_rate": 1.1453491470107325e-06, "loss": 0.0079, "step": 10873 }, { "epoch": 2.55, "learning_rate": 1.144171699320623e-06, "loss": 0.0066, "step": 10874 }, { "epoch": 2.55, "learning_rate": 1.1429948204386455e-06, "loss": 0.0003, "step": 10875 }, { "epoch": 2.55, "learning_rate": 1.1418185104403924e-06, "loss": 0.0003, "step": 10876 }, { "epoch": 2.55, "learning_rate": 1.140642769401411e-06, "loss": 0.0013, "step": 10877 }, { "epoch": 2.55, "learning_rate": 1.1394675973972247e-06, "loss": 0.0004, "step": 10878 }, { "epoch": 2.55, "learning_rate": 1.1382929945033105e-06, "loss": 0.0004, "step": 10879 }, { "epoch": 2.55, "learning_rate": 1.1371189607951194e-06, "loss": 0.0177, "step": 10880 }, { "epoch": 2.55, "learning_rate": 1.135945496348051e-06, "loss": 0.0063, "step": 10881 }, { "epoch": 2.55, "learning_rate": 1.1347726012374826e-06, "loss": 0.0045, "step": 10882 }, { "epoch": 2.55, "learning_rate": 1.1336002755387454e-06, "loss": 0.0193, "step": 10883 }, { "epoch": 2.55, "learning_rate": 1.1324285193271412e-06, "loss": 0.0084, "step": 10884 }, { "epoch": 2.55, "learning_rate": 1.1312573326779286e-06, "loss": 0.029, "step": 10885 }, { "epoch": 2.55, "learning_rate": 1.130086715666333e-06, "loss": 0.0114, "step": 10886 }, { "epoch": 2.56, "learning_rate": 1.1289166683675456e-06, "loss": 0.0039, "step": 10887 }, { "epoch": 2.56, "learning_rate": 1.1277471908567172e-06, "loss": 0.0024, "step": 10888 }, { "epoch": 2.56, "learning_rate": 1.1265782832089622e-06, "loss": 0.0, "step": 10889 }, { "epoch": 2.56, "learning_rate": 1.1254099454993573e-06, "loss": 0.0382, "step": 10890 }, { "epoch": 2.56, "learning_rate": 1.1242421778029489e-06, "loss": 0.0023, "step": 10891 }, { "epoch": 2.56, "learning_rate": 1.1230749801947394e-06, "loss": 0.0453, "step": 10892 }, { "epoch": 2.56, "learning_rate": 1.1219083527496977e-06, "loss": 0.0128, "step": 10893 }, { "epoch": 2.56, "learning_rate": 1.1207422955427594e-06, "loss": 0.0042, "step": 10894 }, { "epoch": 2.56, "learning_rate": 1.1195768086488158e-06, "loss": 0.0051, "step": 10895 }, { "epoch": 2.56, "learning_rate": 1.118411892142731e-06, "loss": 0.0341, "step": 10896 }, { "epoch": 2.56, "learning_rate": 1.1172475460993216e-06, "loss": 0.0008, "step": 10897 }, { "epoch": 2.56, "learning_rate": 1.1160837705933769e-06, "loss": 0.0375, "step": 10898 }, { "epoch": 2.56, "learning_rate": 1.1149205656996431e-06, "loss": 0.0014, "step": 10899 }, { "epoch": 2.56, "learning_rate": 1.113757931492837e-06, "loss": 0.0058, "step": 10900 }, { "epoch": 2.56, "learning_rate": 1.1125958680476323e-06, "loss": 0.0001, "step": 10901 }, { "epoch": 2.56, "learning_rate": 1.1114343754386647e-06, "loss": 0.0178, "step": 10902 }, { "epoch": 2.56, "learning_rate": 1.1102734537405424e-06, "loss": 0.0004, "step": 10903 }, { "epoch": 2.56, "learning_rate": 1.1091131030278279e-06, "loss": 0.003, "step": 10904 }, { "epoch": 2.56, "learning_rate": 1.1079533233750516e-06, "loss": 0.0121, "step": 10905 }, { "epoch": 2.56, "learning_rate": 1.106794114856703e-06, "loss": 0.0273, "step": 10906 }, { "epoch": 2.56, "learning_rate": 1.1056354775472423e-06, "loss": 0.0073, "step": 10907 }, { "epoch": 2.56, "learning_rate": 1.1044774115210854e-06, "loss": 0.0349, "step": 10908 }, { "epoch": 2.56, "learning_rate": 1.1033199168526144e-06, "loss": 0.0028, "step": 10909 }, { "epoch": 2.56, "learning_rate": 1.1021629936161783e-06, "loss": 0.0005, "step": 10910 }, { "epoch": 2.56, "learning_rate": 1.101006641886081e-06, "loss": 0.0001, "step": 10911 }, { "epoch": 2.56, "learning_rate": 1.099850861736602e-06, "loss": 0.0393, "step": 10912 }, { "epoch": 2.56, "learning_rate": 1.0986956532419678e-06, "loss": 0.0016, "step": 10913 }, { "epoch": 2.56, "learning_rate": 1.0975410164763855e-06, "loss": 0.0001, "step": 10914 }, { "epoch": 2.56, "learning_rate": 1.0963869515140103e-06, "loss": 0.0286, "step": 10915 }, { "epoch": 2.56, "learning_rate": 1.0952334584289747e-06, "loss": 0.0131, "step": 10916 }, { "epoch": 2.56, "learning_rate": 1.09408053729536e-06, "loss": 0.0009, "step": 10917 }, { "epoch": 2.56, "learning_rate": 1.092928188187221e-06, "loss": 0.0623, "step": 10918 }, { "epoch": 2.56, "learning_rate": 1.0917764111785767e-06, "loss": 0.0013, "step": 10919 }, { "epoch": 2.56, "learning_rate": 1.0906252063434009e-06, "loss": 0.0325, "step": 10920 }, { "epoch": 2.56, "learning_rate": 1.0894745737556367e-06, "loss": 0.0007, "step": 10921 }, { "epoch": 2.56, "learning_rate": 1.0883245134891873e-06, "loss": 0.0015, "step": 10922 }, { "epoch": 2.56, "learning_rate": 1.0871750256179248e-06, "loss": 0.0034, "step": 10923 }, { "epoch": 2.56, "learning_rate": 1.0860261102156778e-06, "loss": 0.0001, "step": 10924 }, { "epoch": 2.56, "learning_rate": 1.0848777673562406e-06, "loss": 0.0001, "step": 10925 }, { "epoch": 2.56, "learning_rate": 1.0837299971133709e-06, "loss": 0.0161, "step": 10926 }, { "epoch": 2.56, "learning_rate": 1.0825827995607906e-06, "loss": 0.0177, "step": 10927 }, { "epoch": 2.56, "learning_rate": 1.0814361747721869e-06, "loss": 0.0016, "step": 10928 }, { "epoch": 2.56, "learning_rate": 1.0802901228212014e-06, "loss": 0.0808, "step": 10929 }, { "epoch": 2.57, "learning_rate": 1.0791446437814502e-06, "loss": 0.0023, "step": 10930 }, { "epoch": 2.57, "learning_rate": 1.0779997377265017e-06, "loss": 0.0166, "step": 10931 }, { "epoch": 2.57, "learning_rate": 1.0768554047299006e-06, "loss": 0.0344, "step": 10932 }, { "epoch": 2.57, "learning_rate": 1.0757116448651383e-06, "loss": 0.0088, "step": 10933 }, { "epoch": 2.57, "learning_rate": 1.0745684582056848e-06, "loss": 0.0115, "step": 10934 }, { "epoch": 2.57, "learning_rate": 1.0734258448249634e-06, "loss": 0.0022, "step": 10935 }, { "epoch": 2.57, "learning_rate": 1.0722838047963658e-06, "loss": 0.0012, "step": 10936 }, { "epoch": 2.57, "learning_rate": 1.0711423381932451e-06, "loss": 0.0027, "step": 10937 }, { "epoch": 2.57, "learning_rate": 1.0700014450889151e-06, "loss": 0.0015, "step": 10938 }, { "epoch": 2.57, "learning_rate": 1.0688611255566584e-06, "loss": 0.0012, "step": 10939 }, { "epoch": 2.57, "learning_rate": 1.067721379669716e-06, "loss": 0.0412, "step": 10940 }, { "epoch": 2.57, "learning_rate": 1.0665822075012932e-06, "loss": 0.003, "step": 10941 }, { "epoch": 2.57, "learning_rate": 1.0654436091245567e-06, "loss": 0.0043, "step": 10942 }, { "epoch": 2.57, "learning_rate": 1.0643055846126437e-06, "loss": 0.0011, "step": 10943 }, { "epoch": 2.57, "learning_rate": 1.0631681340386457e-06, "loss": 0.0065, "step": 10944 }, { "epoch": 2.57, "learning_rate": 1.0620312574756197e-06, "loss": 0.0012, "step": 10945 }, { "epoch": 2.57, "learning_rate": 1.0608949549965908e-06, "loss": 0.0025, "step": 10946 }, { "epoch": 2.57, "learning_rate": 1.0597592266745405e-06, "loss": 0.0079, "step": 10947 }, { "epoch": 2.57, "learning_rate": 1.0586240725824205e-06, "loss": 0.0259, "step": 10948 }, { "epoch": 2.57, "learning_rate": 1.0574894927931356e-06, "loss": 0.007, "step": 10949 }, { "epoch": 2.57, "learning_rate": 1.0563554873795635e-06, "loss": 0.003, "step": 10950 }, { "epoch": 2.57, "learning_rate": 1.0552220564145388e-06, "loss": 0.0019, "step": 10951 }, { "epoch": 2.57, "learning_rate": 1.0540891999708659e-06, "loss": 0.0004, "step": 10952 }, { "epoch": 2.57, "learning_rate": 1.052956918121304e-06, "loss": 0.0143, "step": 10953 }, { "epoch": 2.57, "learning_rate": 1.0518252109385786e-06, "loss": 0.1032, "step": 10954 }, { "epoch": 2.57, "learning_rate": 1.0506940784953822e-06, "loss": 0.0004, "step": 10955 }, { "epoch": 2.57, "learning_rate": 1.0495635208643674e-06, "loss": 0.0015, "step": 10956 }, { "epoch": 2.57, "learning_rate": 1.0484335381181476e-06, "loss": 0.0104, "step": 10957 }, { "epoch": 2.57, "learning_rate": 1.0473041303292996e-06, "loss": 0.0009, "step": 10958 }, { "epoch": 2.57, "learning_rate": 1.0461752975703688e-06, "loss": 0.0043, "step": 10959 }, { "epoch": 2.57, "learning_rate": 1.04504703991386e-06, "loss": 0.001, "step": 10960 }, { "epoch": 2.57, "learning_rate": 1.043919357432236e-06, "loss": 0.0021, "step": 10961 }, { "epoch": 2.57, "learning_rate": 1.0427922501979336e-06, "loss": 0.0007, "step": 10962 }, { "epoch": 2.57, "learning_rate": 1.0416657182833444e-06, "loss": 0.0002, "step": 10963 }, { "epoch": 2.57, "learning_rate": 1.040539761760826e-06, "loss": 0.0205, "step": 10964 }, { "epoch": 2.57, "learning_rate": 1.0394143807026945e-06, "loss": 0.0136, "step": 10965 }, { "epoch": 2.57, "learning_rate": 1.0382895751812372e-06, "loss": 0.0011, "step": 10966 }, { "epoch": 2.57, "learning_rate": 1.037165345268698e-06, "loss": 0.0045, "step": 10967 }, { "epoch": 2.57, "learning_rate": 1.0360416910372907e-06, "loss": 0.0004, "step": 10968 }, { "epoch": 2.57, "learning_rate": 1.0349186125591781e-06, "loss": 0.0195, "step": 10969 }, { "epoch": 2.57, "learning_rate": 1.033796109906502e-06, "loss": 0.0004, "step": 10970 }, { "epoch": 2.57, "learning_rate": 1.032674183151361e-06, "loss": 0.0006, "step": 10971 }, { "epoch": 2.58, "learning_rate": 1.0315528323658143e-06, "loss": 0.0012, "step": 10972 }, { "epoch": 2.58, "learning_rate": 1.0304320576218863e-06, "loss": 0.0011, "step": 10973 }, { "epoch": 2.58, "learning_rate": 1.0293118589915618e-06, "loss": 0.0153, "step": 10974 }, { "epoch": 2.58, "learning_rate": 1.0281922365467955e-06, "loss": 0.0071, "step": 10975 }, { "epoch": 2.58, "learning_rate": 1.0270731903594978e-06, "loss": 0.019, "step": 10976 }, { "epoch": 2.58, "learning_rate": 1.0259547205015452e-06, "loss": 0.0016, "step": 10977 }, { "epoch": 2.58, "learning_rate": 1.024836827044775e-06, "loss": 0.03, "step": 10978 }, { "epoch": 2.58, "learning_rate": 1.023719510060993e-06, "loss": 0.0012, "step": 10979 }, { "epoch": 2.58, "learning_rate": 1.0226027696219631e-06, "loss": 0.0261, "step": 10980 }, { "epoch": 2.58, "learning_rate": 1.0214866057994099e-06, "loss": 0.0004, "step": 10981 }, { "epoch": 2.58, "learning_rate": 1.0203710186650296e-06, "loss": 0.0005, "step": 10982 }, { "epoch": 2.58, "learning_rate": 1.01925600829047e-06, "loss": 0.0119, "step": 10983 }, { "epoch": 2.58, "learning_rate": 1.0181415747473578e-06, "loss": 0.0025, "step": 10984 }, { "epoch": 2.58, "learning_rate": 1.0170277181072608e-06, "loss": 0.0011, "step": 10985 }, { "epoch": 2.58, "learning_rate": 1.0159144384417285e-06, "loss": 0.0268, "step": 10986 }, { "epoch": 2.58, "learning_rate": 1.0148017358222683e-06, "loss": 0.0079, "step": 10987 }, { "epoch": 2.58, "learning_rate": 1.0136896103203463e-06, "loss": 0.0033, "step": 10988 }, { "epoch": 2.58, "learning_rate": 1.0125780620073932e-06, "loss": 0.0175, "step": 10989 }, { "epoch": 2.58, "learning_rate": 1.011467090954803e-06, "loss": 0.0015, "step": 10990 }, { "epoch": 2.58, "learning_rate": 1.0103566972339374e-06, "loss": 0.0018, "step": 10991 }, { "epoch": 2.58, "learning_rate": 1.0092468809161127e-06, "loss": 0.0322, "step": 10992 }, { "epoch": 2.58, "learning_rate": 1.0081376420726141e-06, "loss": 0.007, "step": 10993 }, { "epoch": 2.58, "learning_rate": 1.007028980774686e-06, "loss": 0.0135, "step": 10994 }, { "epoch": 2.58, "learning_rate": 1.0059208970935396e-06, "loss": 0.0346, "step": 10995 }, { "epoch": 2.58, "learning_rate": 1.0048133911003466e-06, "loss": 0.0103, "step": 10996 }, { "epoch": 2.58, "learning_rate": 1.0037064628662385e-06, "loss": 0.065, "step": 10997 }, { "epoch": 2.58, "learning_rate": 1.0026001124623186e-06, "loss": 0.0065, "step": 10998 }, { "epoch": 2.58, "learning_rate": 1.0014943399596455e-06, "loss": 0.0015, "step": 10999 }, { "epoch": 2.58, "learning_rate": 1.0003891454292415e-06, "loss": 0.0127, "step": 11000 }, { "epoch": 2.58, "learning_rate": 9.992845289420915e-07, "loss": 0.0021, "step": 11001 }, { "epoch": 2.58, "learning_rate": 9.981804905691483e-07, "loss": 0.034, "step": 11002 }, { "epoch": 2.58, "learning_rate": 9.970770303813216e-07, "loss": 0.0003, "step": 11003 }, { "epoch": 2.58, "learning_rate": 9.9597414844949e-07, "loss": 0.0224, "step": 11004 }, { "epoch": 2.58, "learning_rate": 9.948718448444884e-07, "loss": 0.0003, "step": 11005 }, { "epoch": 2.58, "learning_rate": 9.937701196371151e-07, "loss": 0.0365, "step": 11006 }, { "epoch": 2.58, "learning_rate": 9.926689728981399e-07, "loss": 0.0102, "step": 11007 }, { "epoch": 2.58, "learning_rate": 9.915684046982854e-07, "loss": 0.0054, "step": 11008 }, { "epoch": 2.58, "learning_rate": 9.904684151082423e-07, "loss": 0.0024, "step": 11009 }, { "epoch": 2.58, "learning_rate": 9.893690041986593e-07, "loss": 0.0036, "step": 11010 }, { "epoch": 2.58, "learning_rate": 9.882701720401567e-07, "loss": 0.0088, "step": 11011 }, { "epoch": 2.58, "learning_rate": 9.87171918703309e-07, "loss": 0.0249, "step": 11012 }, { "epoch": 2.58, "learning_rate": 9.860742442586546e-07, "loss": 0.0081, "step": 11013 }, { "epoch": 2.58, "learning_rate": 9.849771487767023e-07, "loss": 0.0001, "step": 11014 }, { "epoch": 2.59, "learning_rate": 9.83880632327916e-07, "loss": 0.0521, "step": 11015 }, { "epoch": 2.59, "learning_rate": 9.827846949827248e-07, "loss": 0.0008, "step": 11016 }, { "epoch": 2.59, "learning_rate": 9.816893368115176e-07, "loss": 0.0193, "step": 11017 }, { "epoch": 2.59, "learning_rate": 9.80594557884653e-07, "loss": 0.0013, "step": 11018 }, { "epoch": 2.59, "learning_rate": 9.79500358272446e-07, "loss": 0.0032, "step": 11019 }, { "epoch": 2.59, "learning_rate": 9.78406738045181e-07, "loss": 0.0003, "step": 11020 }, { "epoch": 2.59, "learning_rate": 9.773136972730957e-07, "loss": 0.0079, "step": 11021 }, { "epoch": 2.59, "learning_rate": 9.762212360263967e-07, "loss": 0.0001, "step": 11022 }, { "epoch": 2.59, "learning_rate": 9.75129354375256e-07, "loss": 0.001, "step": 11023 }, { "epoch": 2.59, "learning_rate": 9.74038052389803e-07, "loss": 0.0007, "step": 11024 }, { "epoch": 2.59, "learning_rate": 9.729473301401316e-07, "loss": 0.0078, "step": 11025 }, { "epoch": 2.59, "learning_rate": 9.718571876962969e-07, "loss": 0.0139, "step": 11026 }, { "epoch": 2.59, "learning_rate": 9.707676251283226e-07, "loss": 0.0035, "step": 11027 }, { "epoch": 2.59, "learning_rate": 9.696786425061888e-07, "loss": 0.0414, "step": 11028 }, { "epoch": 2.59, "learning_rate": 9.685902398998393e-07, "loss": 0.0024, "step": 11029 }, { "epoch": 2.59, "learning_rate": 9.675024173791847e-07, "loss": 0.0007, "step": 11030 }, { "epoch": 2.59, "learning_rate": 9.66415175014095e-07, "loss": 0.0088, "step": 11031 }, { "epoch": 2.59, "learning_rate": 9.653285128744027e-07, "loss": 0.0006, "step": 11032 }, { "epoch": 2.59, "learning_rate": 9.642424310299025e-07, "loss": 0.0288, "step": 11033 }, { "epoch": 2.59, "learning_rate": 9.631569295503561e-07, "loss": 0.0008, "step": 11034 }, { "epoch": 2.59, "learning_rate": 9.620720085054847e-07, "loss": 0.0002, "step": 11035 }, { "epoch": 2.59, "learning_rate": 9.60987667964971e-07, "loss": 0.0261, "step": 11036 }, { "epoch": 2.59, "learning_rate": 9.599039079984617e-07, "loss": 0.0214, "step": 11037 }, { "epoch": 2.59, "learning_rate": 9.588207286755668e-07, "loss": 0.0004, "step": 11038 }, { "epoch": 2.59, "learning_rate": 9.577381300658606e-07, "loss": 0.0393, "step": 11039 }, { "epoch": 2.59, "learning_rate": 9.566561122388785e-07, "loss": 0.0018, "step": 11040 }, { "epoch": 2.59, "learning_rate": 9.55574675264116e-07, "loss": 0.0473, "step": 11041 }, { "epoch": 2.59, "learning_rate": 9.54493819211032e-07, "loss": 0.0002, "step": 11042 }, { "epoch": 2.59, "learning_rate": 9.534135441490545e-07, "loss": 0.0114, "step": 11043 }, { "epoch": 2.59, "learning_rate": 9.523338501475665e-07, "loss": 0.0536, "step": 11044 }, { "epoch": 2.59, "learning_rate": 9.512547372759173e-07, "loss": 0.0129, "step": 11045 }, { "epoch": 2.59, "learning_rate": 9.501762056034158e-07, "loss": 0.0005, "step": 11046 }, { "epoch": 2.59, "learning_rate": 9.4909825519934e-07, "loss": 0.0005, "step": 11047 }, { "epoch": 2.59, "learning_rate": 9.480208861329232e-07, "loss": 0.0001, "step": 11048 }, { "epoch": 2.59, "learning_rate": 9.469440984733647e-07, "loss": 0.0037, "step": 11049 }, { "epoch": 2.59, "learning_rate": 9.45867892289829e-07, "loss": 0.0208, "step": 11050 }, { "epoch": 2.59, "learning_rate": 9.447922676514387e-07, "loss": 0.0543, "step": 11051 }, { "epoch": 2.59, "learning_rate": 9.437172246272819e-07, "loss": 0.0308, "step": 11052 }, { "epoch": 2.59, "learning_rate": 9.426427632864054e-07, "loss": 0.0442, "step": 11053 }, { "epoch": 2.59, "learning_rate": 9.415688836978253e-07, "loss": 0.001, "step": 11054 }, { "epoch": 2.59, "learning_rate": 9.404955859305143e-07, "loss": 0.0052, "step": 11055 }, { "epoch": 2.59, "learning_rate": 9.394228700534125e-07, "loss": 0.0051, "step": 11056 }, { "epoch": 2.6, "learning_rate": 9.383507361354194e-07, "loss": 0.0499, "step": 11057 }, { "epoch": 2.6, "learning_rate": 9.372791842453943e-07, "loss": 0.0428, "step": 11058 }, { "epoch": 2.6, "learning_rate": 9.362082144521689e-07, "loss": 0.0071, "step": 11059 }, { "epoch": 2.6, "learning_rate": 9.351378268245282e-07, "loss": 0.0181, "step": 11060 }, { "epoch": 2.6, "learning_rate": 9.340680214312236e-07, "loss": 0.0824, "step": 11061 }, { "epoch": 2.6, "learning_rate": 9.329987983409649e-07, "loss": 0.0011, "step": 11062 }, { "epoch": 2.6, "learning_rate": 9.319301576224337e-07, "loss": 0.0025, "step": 11063 }, { "epoch": 2.6, "learning_rate": 9.308620993442663e-07, "loss": 0.0035, "step": 11064 }, { "epoch": 2.6, "learning_rate": 9.297946235750622e-07, "loss": 0.0121, "step": 11065 }, { "epoch": 2.6, "learning_rate": 9.287277303833886e-07, "loss": 0.0171, "step": 11066 }, { "epoch": 2.6, "learning_rate": 9.276614198377687e-07, "loss": 0.0003, "step": 11067 }, { "epoch": 2.6, "learning_rate": 9.265956920066932e-07, "loss": 0.0089, "step": 11068 }, { "epoch": 2.6, "learning_rate": 9.255305469586118e-07, "loss": 0.0004, "step": 11069 }, { "epoch": 2.6, "learning_rate": 9.244659847619408e-07, "loss": 0.0022, "step": 11070 }, { "epoch": 2.6, "learning_rate": 9.234020054850568e-07, "loss": 0.0002, "step": 11071 }, { "epoch": 2.6, "learning_rate": 9.223386091962949e-07, "loss": 0.0002, "step": 11072 }, { "epoch": 2.6, "learning_rate": 9.212757959639629e-07, "loss": 0.0029, "step": 11073 }, { "epoch": 2.6, "learning_rate": 9.202135658563194e-07, "loss": 0.0099, "step": 11074 }, { "epoch": 2.6, "learning_rate": 9.191519189415965e-07, "loss": 0.002, "step": 11075 }, { "epoch": 2.6, "learning_rate": 9.180908552879808e-07, "loss": 0.0152, "step": 11076 }, { "epoch": 2.6, "learning_rate": 9.170303749636244e-07, "loss": 0.0014, "step": 11077 }, { "epoch": 2.6, "learning_rate": 9.159704780366396e-07, "loss": 0.0007, "step": 11078 }, { "epoch": 2.6, "learning_rate": 9.149111645751074e-07, "loss": 0.0327, "step": 11079 }, { "epoch": 2.6, "learning_rate": 9.138524346470656e-07, "loss": 0.0141, "step": 11080 }, { "epoch": 2.6, "learning_rate": 9.127942883205143e-07, "loss": 0.0021, "step": 11081 }, { "epoch": 2.6, "learning_rate": 9.117367256634213e-07, "loss": 0.001, "step": 11082 }, { "epoch": 2.6, "learning_rate": 9.106797467437123e-07, "loss": 0.0001, "step": 11083 }, { "epoch": 2.6, "learning_rate": 9.096233516292774e-07, "loss": 0.0069, "step": 11084 }, { "epoch": 2.6, "learning_rate": 9.085675403879657e-07, "loss": 0.0022, "step": 11085 }, { "epoch": 2.6, "learning_rate": 9.075123130875951e-07, "loss": 0.0158, "step": 11086 }, { "epoch": 2.6, "learning_rate": 9.064576697959427e-07, "loss": 0.0293, "step": 11087 }, { "epoch": 2.6, "learning_rate": 9.054036105807462e-07, "loss": 0.0111, "step": 11088 }, { "epoch": 2.6, "learning_rate": 9.043501355097061e-07, "loss": 0.0012, "step": 11089 }, { "epoch": 2.6, "learning_rate": 9.032972446504895e-07, "loss": 0.0126, "step": 11090 }, { "epoch": 2.6, "learning_rate": 9.022449380707254e-07, "loss": 0.02, "step": 11091 }, { "epoch": 2.6, "learning_rate": 9.011932158379999e-07, "loss": 0.0298, "step": 11092 }, { "epoch": 2.6, "learning_rate": 9.001420780198655e-07, "loss": 0.0263, "step": 11093 }, { "epoch": 2.6, "learning_rate": 8.990915246838361e-07, "loss": 0.0062, "step": 11094 }, { "epoch": 2.6, "learning_rate": 8.98041555897391e-07, "loss": 0.0203, "step": 11095 }, { "epoch": 2.6, "learning_rate": 8.969921717279672e-07, "loss": 0.0217, "step": 11096 }, { "epoch": 2.6, "learning_rate": 8.959433722429678e-07, "loss": 0.004, "step": 11097 }, { "epoch": 2.6, "learning_rate": 8.948951575097542e-07, "loss": 0.0062, "step": 11098 }, { "epoch": 2.6, "learning_rate": 8.93847527595657e-07, "loss": 0.0051, "step": 11099 }, { "epoch": 2.61, "learning_rate": 8.928004825679637e-07, "loss": 0.0012, "step": 11100 }, { "epoch": 2.61, "learning_rate": 8.917540224939225e-07, "loss": 0.0008, "step": 11101 }, { "epoch": 2.61, "learning_rate": 8.907081474407531e-07, "loss": 0.0061, "step": 11102 }, { "epoch": 2.61, "learning_rate": 8.896628574756284e-07, "loss": 0.0238, "step": 11103 }, { "epoch": 2.61, "learning_rate": 8.886181526656879e-07, "loss": 0.0006, "step": 11104 }, { "epoch": 2.61, "learning_rate": 8.875740330780313e-07, "loss": 0.0011, "step": 11105 }, { "epoch": 2.61, "learning_rate": 8.865304987797252e-07, "loss": 0.0198, "step": 11106 }, { "epoch": 2.61, "learning_rate": 8.854875498377935e-07, "loss": 0.0056, "step": 11107 }, { "epoch": 2.61, "learning_rate": 8.844451863192238e-07, "loss": 0.0007, "step": 11108 }, { "epoch": 2.61, "learning_rate": 8.834034082909704e-07, "loss": 0.0098, "step": 11109 }, { "epoch": 2.61, "learning_rate": 8.823622158199418e-07, "loss": 0.0133, "step": 11110 }, { "epoch": 2.61, "learning_rate": 8.813216089730182e-07, "loss": 0.0021, "step": 11111 }, { "epoch": 2.61, "learning_rate": 8.802815878170357e-07, "loss": 0.0094, "step": 11112 }, { "epoch": 2.61, "learning_rate": 8.792421524187944e-07, "loss": 0.0085, "step": 11113 }, { "epoch": 2.61, "learning_rate": 8.782033028450543e-07, "loss": 0.0009, "step": 11114 }, { "epoch": 2.61, "learning_rate": 8.771650391625463e-07, "loss": 0.0024, "step": 11115 }, { "epoch": 2.61, "learning_rate": 8.761273614379539e-07, "loss": 0.0037, "step": 11116 }, { "epoch": 2.61, "learning_rate": 8.750902697379271e-07, "loss": 0.0182, "step": 11117 }, { "epoch": 2.61, "learning_rate": 8.740537641290803e-07, "loss": 0.0014, "step": 11118 }, { "epoch": 2.61, "learning_rate": 8.730178446779858e-07, "loss": 0.0009, "step": 11119 }, { "epoch": 2.61, "learning_rate": 8.719825114511826e-07, "loss": 0.0049, "step": 11120 }, { "epoch": 2.61, "learning_rate": 8.709477645151654e-07, "loss": 0.0036, "step": 11121 }, { "epoch": 2.61, "learning_rate": 8.69913603936402e-07, "loss": 0.0136, "step": 11122 }, { "epoch": 2.61, "learning_rate": 8.688800297813138e-07, "loss": 0.0001, "step": 11123 }, { "epoch": 2.61, "learning_rate": 8.678470421162843e-07, "loss": 0.0009, "step": 11124 }, { "epoch": 2.61, "learning_rate": 8.668146410076661e-07, "loss": 0.0011, "step": 11125 }, { "epoch": 2.61, "learning_rate": 8.657828265217671e-07, "loss": 0.0177, "step": 11126 }, { "epoch": 2.61, "learning_rate": 8.647515987248645e-07, "loss": 0.0007, "step": 11127 }, { "epoch": 2.61, "learning_rate": 8.637209576831884e-07, "loss": 0.0028, "step": 11128 }, { "epoch": 2.61, "learning_rate": 8.626909034629416e-07, "loss": 0.0008, "step": 11129 }, { "epoch": 2.61, "learning_rate": 8.616614361302811e-07, "loss": 0.0514, "step": 11130 }, { "epoch": 2.61, "learning_rate": 8.606325557513318e-07, "loss": 0.0122, "step": 11131 }, { "epoch": 2.61, "learning_rate": 8.596042623921764e-07, "loss": 0.0202, "step": 11132 }, { "epoch": 2.61, "learning_rate": 8.585765561188619e-07, "loss": 0.0034, "step": 11133 }, { "epoch": 2.61, "learning_rate": 8.575494369974003e-07, "loss": 0.0602, "step": 11134 }, { "epoch": 2.61, "learning_rate": 8.565229050937618e-07, "loss": 0.0284, "step": 11135 }, { "epoch": 2.61, "learning_rate": 8.554969604738795e-07, "loss": 0.0003, "step": 11136 }, { "epoch": 2.61, "learning_rate": 8.544716032036482e-07, "loss": 0.0026, "step": 11137 }, { "epoch": 2.61, "learning_rate": 8.53446833348931e-07, "loss": 0.0004, "step": 11138 }, { "epoch": 2.61, "learning_rate": 8.524226509755463e-07, "loss": 0.0267, "step": 11139 }, { "epoch": 2.61, "learning_rate": 8.513990561492758e-07, "loss": 0.0027, "step": 11140 }, { "epoch": 2.61, "learning_rate": 8.503760489358648e-07, "loss": 0.0137, "step": 11141 }, { "epoch": 2.61, "learning_rate": 8.493536294010218e-07, "loss": 0.0213, "step": 11142 }, { "epoch": 2.62, "learning_rate": 8.483317976104211e-07, "loss": 0.0005, "step": 11143 }, { "epoch": 2.62, "learning_rate": 8.473105536296866e-07, "loss": 0.0442, "step": 11144 }, { "epoch": 2.62, "learning_rate": 8.462898975244172e-07, "loss": 0.0059, "step": 11145 }, { "epoch": 2.62, "learning_rate": 8.45269829360168e-07, "loss": 0.0345, "step": 11146 }, { "epoch": 2.62, "learning_rate": 8.4425034920246e-07, "loss": 0.0426, "step": 11147 }, { "epoch": 2.62, "learning_rate": 8.432314571167721e-07, "loss": 0.0258, "step": 11148 }, { "epoch": 2.62, "learning_rate": 8.422131531685473e-07, "loss": 0.0099, "step": 11149 }, { "epoch": 2.62, "learning_rate": 8.411954374231934e-07, "loss": 0.0215, "step": 11150 }, { "epoch": 2.62, "learning_rate": 8.401783099460759e-07, "loss": 0.0001, "step": 11151 }, { "epoch": 2.62, "learning_rate": 8.391617708025257e-07, "loss": 0.0082, "step": 11152 }, { "epoch": 2.62, "learning_rate": 8.381458200578329e-07, "loss": 0.0054, "step": 11153 }, { "epoch": 2.62, "learning_rate": 8.371304577772555e-07, "loss": 0.0103, "step": 11154 }, { "epoch": 2.62, "learning_rate": 8.361156840260076e-07, "loss": 0.0035, "step": 11155 }, { "epoch": 2.62, "learning_rate": 8.351014988692696e-07, "loss": 0.0166, "step": 11156 }, { "epoch": 2.62, "learning_rate": 8.340879023721782e-07, "loss": 0.0018, "step": 11157 }, { "epoch": 2.62, "learning_rate": 8.330748945998391e-07, "loss": 0.0026, "step": 11158 }, { "epoch": 2.62, "learning_rate": 8.320624756173224e-07, "loss": 0.0013, "step": 11159 }, { "epoch": 2.62, "learning_rate": 8.310506454896472e-07, "loss": 0.0099, "step": 11160 }, { "epoch": 2.62, "learning_rate": 8.300394042818094e-07, "loss": 0.004, "step": 11161 }, { "epoch": 2.62, "learning_rate": 8.290287520587558e-07, "loss": 0.0017, "step": 11162 }, { "epoch": 2.62, "learning_rate": 8.280186888854069e-07, "loss": 0.0098, "step": 11163 }, { "epoch": 2.62, "learning_rate": 8.270092148266318e-07, "loss": 0.0009, "step": 11164 }, { "epoch": 2.62, "learning_rate": 8.26000329947274e-07, "loss": 0.0021, "step": 11165 }, { "epoch": 2.62, "learning_rate": 8.249920343121298e-07, "loss": 0.0052, "step": 11166 }, { "epoch": 2.62, "learning_rate": 8.23984327985966e-07, "loss": 0.005, "step": 11167 }, { "epoch": 2.62, "learning_rate": 8.229772110335066e-07, "loss": 0.0417, "step": 11168 }, { "epoch": 2.62, "learning_rate": 8.219706835194342e-07, "loss": 0.0028, "step": 11169 }, { "epoch": 2.62, "learning_rate": 8.209647455084035e-07, "loss": 0.0023, "step": 11170 }, { "epoch": 2.62, "learning_rate": 8.199593970650233e-07, "loss": 0.0053, "step": 11171 }, { "epoch": 2.62, "learning_rate": 8.189546382538671e-07, "loss": 0.023, "step": 11172 }, { "epoch": 2.62, "learning_rate": 8.179504691394679e-07, "loss": 0.0002, "step": 11173 }, { "epoch": 2.62, "learning_rate": 8.169468897863275e-07, "loss": 0.02, "step": 11174 }, { "epoch": 2.62, "learning_rate": 8.159439002589031e-07, "loss": 0.0221, "step": 11175 }, { "epoch": 2.62, "learning_rate": 8.149415006216166e-07, "loss": 0.0021, "step": 11176 }, { "epoch": 2.62, "learning_rate": 8.13939690938853e-07, "loss": 0.0089, "step": 11177 }, { "epoch": 2.62, "learning_rate": 8.129384712749566e-07, "loss": 0.0031, "step": 11178 }, { "epoch": 2.62, "learning_rate": 8.119378416942403e-07, "loss": 0.0009, "step": 11179 }, { "epoch": 2.62, "learning_rate": 8.109378022609671e-07, "loss": 0.0024, "step": 11180 }, { "epoch": 2.62, "learning_rate": 8.099383530393745e-07, "loss": 0.0031, "step": 11181 }, { "epoch": 2.62, "learning_rate": 8.089394940936545e-07, "loss": 0.0123, "step": 11182 }, { "epoch": 2.62, "learning_rate": 8.079412254879659e-07, "loss": 0.0007, "step": 11183 }, { "epoch": 2.62, "learning_rate": 8.069435472864251e-07, "loss": 0.0012, "step": 11184 }, { "epoch": 2.63, "learning_rate": 8.05946459553113e-07, "loss": 0.0026, "step": 11185 }, { "epoch": 2.63, "learning_rate": 8.04949962352074e-07, "loss": 0.0007, "step": 11186 }, { "epoch": 2.63, "learning_rate": 8.039540557473124e-07, "loss": 0.0032, "step": 11187 }, { "epoch": 2.63, "learning_rate": 8.02958739802795e-07, "loss": 0.026, "step": 11188 }, { "epoch": 2.63, "learning_rate": 8.01964014582447e-07, "loss": 0.0055, "step": 11189 }, { "epoch": 2.63, "learning_rate": 8.009698801501664e-07, "loss": 0.0018, "step": 11190 }, { "epoch": 2.63, "learning_rate": 7.99976336569801e-07, "loss": 0.0002, "step": 11191 }, { "epoch": 2.63, "learning_rate": 7.989833839051664e-07, "loss": 0.0017, "step": 11192 }, { "epoch": 2.63, "learning_rate": 7.979910222200426e-07, "loss": 0.0008, "step": 11193 }, { "epoch": 2.63, "learning_rate": 7.969992515781655e-07, "loss": 0.0002, "step": 11194 }, { "epoch": 2.63, "learning_rate": 7.960080720432417e-07, "loss": 0.008, "step": 11195 }, { "epoch": 2.63, "learning_rate": 7.950174836789259e-07, "loss": 0.0104, "step": 11196 }, { "epoch": 2.63, "learning_rate": 7.940274865488517e-07, "loss": 0.0004, "step": 11197 }, { "epoch": 2.63, "learning_rate": 7.930380807165993e-07, "loss": 0.0024, "step": 11198 }, { "epoch": 2.63, "learning_rate": 7.920492662457269e-07, "loss": 0.0019, "step": 11199 }, { "epoch": 2.63, "learning_rate": 7.910610431997356e-07, "loss": 0.0011, "step": 11200 }, { "epoch": 2.63, "learning_rate": 7.900734116421049e-07, "loss": 0.0029, "step": 11201 }, { "epoch": 2.63, "learning_rate": 7.890863716362695e-07, "loss": 0.0011, "step": 11202 }, { "epoch": 2.63, "learning_rate": 7.880999232456277e-07, "loss": 0.0272, "step": 11203 }, { "epoch": 2.63, "learning_rate": 7.871140665335364e-07, "loss": 0.0014, "step": 11204 }, { "epoch": 2.63, "learning_rate": 7.861288015633173e-07, "loss": 0.0559, "step": 11205 }, { "epoch": 2.63, "learning_rate": 7.851441283982564e-07, "loss": 0.0149, "step": 11206 }, { "epoch": 2.63, "learning_rate": 7.841600471015965e-07, "loss": 0.0509, "step": 11207 }, { "epoch": 2.63, "learning_rate": 7.831765577365458e-07, "loss": 0.0019, "step": 11208 }, { "epoch": 2.63, "learning_rate": 7.821936603662716e-07, "loss": 0.0004, "step": 11209 }, { "epoch": 2.63, "learning_rate": 7.81211355053908e-07, "loss": 0.0095, "step": 11210 }, { "epoch": 2.63, "learning_rate": 7.80229641862551e-07, "loss": 0.0116, "step": 11211 }, { "epoch": 2.63, "learning_rate": 7.79248520855248e-07, "loss": 0.0011, "step": 11212 }, { "epoch": 2.63, "learning_rate": 7.78267992095022e-07, "loss": 0.0013, "step": 11213 }, { "epoch": 2.63, "learning_rate": 7.772880556448503e-07, "loss": 0.0001, "step": 11214 }, { "epoch": 2.63, "learning_rate": 7.763087115676771e-07, "loss": 0.0018, "step": 11215 }, { "epoch": 2.63, "learning_rate": 7.753299599263986e-07, "loss": 0.016, "step": 11216 }, { "epoch": 2.63, "learning_rate": 7.743518007838869e-07, "loss": 0.0495, "step": 11217 }, { "epoch": 2.63, "learning_rate": 7.733742342029637e-07, "loss": 0.0007, "step": 11218 }, { "epoch": 2.63, "learning_rate": 7.723972602464214e-07, "loss": 0.0326, "step": 11219 }, { "epoch": 2.63, "learning_rate": 7.714208789770105e-07, "loss": 0.0011, "step": 11220 }, { "epoch": 2.63, "learning_rate": 7.704450904574413e-07, "loss": 0.0025, "step": 11221 }, { "epoch": 2.63, "learning_rate": 7.694698947503921e-07, "loss": 0.0407, "step": 11222 }, { "epoch": 2.63, "learning_rate": 7.684952919184973e-07, "loss": 0.0033, "step": 11223 }, { "epoch": 2.63, "learning_rate": 7.675212820243549e-07, "loss": 0.023, "step": 11224 }, { "epoch": 2.63, "learning_rate": 7.665478651305258e-07, "loss": 0.0047, "step": 11225 }, { "epoch": 2.63, "learning_rate": 7.655750412995344e-07, "loss": 0.0053, "step": 11226 }, { "epoch": 2.63, "learning_rate": 7.64602810593863e-07, "loss": 0.0046, "step": 11227 }, { "epoch": 2.64, "learning_rate": 7.636311730759571e-07, "loss": 0.0098, "step": 11228 }, { "epoch": 2.64, "learning_rate": 7.626601288082269e-07, "loss": 0.0177, "step": 11229 }, { "epoch": 2.64, "learning_rate": 7.616896778530403e-07, "loss": 0.004, "step": 11230 }, { "epoch": 2.64, "learning_rate": 7.607198202727328e-07, "loss": 0.0363, "step": 11231 }, { "epoch": 2.64, "learning_rate": 7.597505561295937e-07, "loss": 0.0555, "step": 11232 }, { "epoch": 2.64, "learning_rate": 7.587818854858808e-07, "loss": 0.0288, "step": 11233 }, { "epoch": 2.64, "learning_rate": 7.5781380840381e-07, "loss": 0.0286, "step": 11234 }, { "epoch": 2.64, "learning_rate": 7.568463249455649e-07, "loss": 0.0002, "step": 11235 }, { "epoch": 2.64, "learning_rate": 7.558794351732824e-07, "loss": 0.0027, "step": 11236 }, { "epoch": 2.64, "learning_rate": 7.54913139149066e-07, "loss": 0.0004, "step": 11237 }, { "epoch": 2.64, "learning_rate": 7.539474369349842e-07, "loss": 0.0028, "step": 11238 }, { "epoch": 2.64, "learning_rate": 7.529823285930615e-07, "loss": 0.0007, "step": 11239 }, { "epoch": 2.64, "learning_rate": 7.520178141852863e-07, "loss": 0.0104, "step": 11240 }, { "epoch": 2.64, "learning_rate": 7.510538937736078e-07, "loss": 0.0457, "step": 11241 }, { "epoch": 2.64, "learning_rate": 7.500905674199421e-07, "loss": 0.0066, "step": 11242 }, { "epoch": 2.64, "learning_rate": 7.49127835186162e-07, "loss": 0.0001, "step": 11243 }, { "epoch": 2.64, "learning_rate": 7.481656971341012e-07, "loss": 0.0016, "step": 11244 }, { "epoch": 2.64, "learning_rate": 7.472041533255614e-07, "loss": 0.0007, "step": 11245 }, { "epoch": 2.64, "learning_rate": 7.462432038222989e-07, "loss": 0.0061, "step": 11246 }, { "epoch": 2.64, "learning_rate": 7.452828486860397e-07, "loss": 0.0159, "step": 11247 }, { "epoch": 2.64, "learning_rate": 7.443230879784624e-07, "loss": 0.0016, "step": 11248 }, { "epoch": 2.64, "learning_rate": 7.433639217612143e-07, "loss": 0.0152, "step": 11249 }, { "epoch": 2.64, "learning_rate": 7.424053500959028e-07, "loss": 0.0004, "step": 11250 }, { "epoch": 2.64, "learning_rate": 7.414473730440986e-07, "loss": 0.1121, "step": 11251 }, { "epoch": 2.64, "learning_rate": 7.404899906673268e-07, "loss": 0.0095, "step": 11252 }, { "epoch": 2.64, "learning_rate": 7.395332030270841e-07, "loss": 0.0038, "step": 11253 }, { "epoch": 2.64, "learning_rate": 7.385770101848244e-07, "loss": 0.0009, "step": 11254 }, { "epoch": 2.64, "learning_rate": 7.376214122019643e-07, "loss": 0.005, "step": 11255 }, { "epoch": 2.64, "learning_rate": 7.366664091398812e-07, "loss": 0.0459, "step": 11256 }, { "epoch": 2.64, "learning_rate": 7.357120010599128e-07, "loss": 0.0277, "step": 11257 }, { "epoch": 2.64, "learning_rate": 7.347581880233623e-07, "loss": 0.0032, "step": 11258 }, { "epoch": 2.64, "learning_rate": 7.33804970091494e-07, "loss": 0.0005, "step": 11259 }, { "epoch": 2.64, "learning_rate": 7.328523473255311e-07, "loss": 0.0012, "step": 11260 }, { "epoch": 2.64, "learning_rate": 7.319003197866592e-07, "loss": 0.0423, "step": 11261 }, { "epoch": 2.64, "learning_rate": 7.309488875360293e-07, "loss": 0.0025, "step": 11262 }, { "epoch": 2.64, "learning_rate": 7.299980506347537e-07, "loss": 0.0365, "step": 11263 }, { "epoch": 2.64, "learning_rate": 7.290478091438979e-07, "loss": 0.0063, "step": 11264 }, { "epoch": 2.64, "learning_rate": 7.280981631245021e-07, "loss": 0.0962, "step": 11265 }, { "epoch": 2.64, "learning_rate": 7.271491126375574e-07, "loss": 0.0035, "step": 11266 }, { "epoch": 2.64, "learning_rate": 7.262006577440262e-07, "loss": 0.0002, "step": 11267 }, { "epoch": 2.64, "learning_rate": 7.252527985048208e-07, "loss": 0.0694, "step": 11268 }, { "epoch": 2.64, "learning_rate": 7.243055349808281e-07, "loss": 0.0071, "step": 11269 }, { "epoch": 2.64, "learning_rate": 7.233588672328862e-07, "loss": 0.0034, "step": 11270 }, { "epoch": 2.65, "learning_rate": 7.224127953218019e-07, "loss": 0.0005, "step": 11271 }, { "epoch": 2.65, "learning_rate": 7.214673193083422e-07, "loss": 0.0018, "step": 11272 }, { "epoch": 2.65, "learning_rate": 7.205224392532307e-07, "loss": 0.0003, "step": 11273 }, { "epoch": 2.65, "learning_rate": 7.195781552171621e-07, "loss": 0.0373, "step": 11274 }, { "epoch": 2.65, "learning_rate": 7.186344672607848e-07, "loss": 0.0496, "step": 11275 }, { "epoch": 2.65, "learning_rate": 7.176913754447112e-07, "loss": 0.0446, "step": 11276 }, { "epoch": 2.65, "learning_rate": 7.167488798295153e-07, "loss": 0.0016, "step": 11277 }, { "epoch": 2.65, "learning_rate": 7.158069804757362e-07, "loss": 0.0038, "step": 11278 }, { "epoch": 2.65, "learning_rate": 7.148656774438711e-07, "loss": 0.0008, "step": 11279 }, { "epoch": 2.65, "learning_rate": 7.139249707943763e-07, "loss": 0.0002, "step": 11280 }, { "epoch": 2.65, "learning_rate": 7.129848605876777e-07, "loss": 0.0209, "step": 11281 }, { "epoch": 2.65, "learning_rate": 7.120453468841559e-07, "loss": 0.0001, "step": 11282 }, { "epoch": 2.65, "learning_rate": 7.111064297441583e-07, "loss": 0.0386, "step": 11283 }, { "epoch": 2.65, "learning_rate": 7.101681092279877e-07, "loss": 0.0198, "step": 11284 }, { "epoch": 2.65, "learning_rate": 7.092303853959149e-07, "loss": 0.0136, "step": 11285 }, { "epoch": 2.65, "learning_rate": 7.08293258308167e-07, "loss": 0.0074, "step": 11286 }, { "epoch": 2.65, "learning_rate": 7.073567280249393e-07, "loss": 0.0035, "step": 11287 }, { "epoch": 2.65, "learning_rate": 7.064207946063828e-07, "loss": 0.0004, "step": 11288 }, { "epoch": 2.65, "learning_rate": 7.054854581126103e-07, "loss": 0.0186, "step": 11289 }, { "epoch": 2.65, "learning_rate": 7.045507186037026e-07, "loss": 0.0006, "step": 11290 }, { "epoch": 2.65, "learning_rate": 7.036165761396963e-07, "loss": 0.0011, "step": 11291 }, { "epoch": 2.65, "learning_rate": 7.026830307805899e-07, "loss": 0.0245, "step": 11292 }, { "epoch": 2.65, "learning_rate": 7.017500825863433e-07, "loss": 0.0007, "step": 11293 }, { "epoch": 2.65, "learning_rate": 7.00817731616884e-07, "loss": 0.0019, "step": 11294 }, { "epoch": 2.65, "learning_rate": 6.998859779320954e-07, "loss": 0.0002, "step": 11295 }, { "epoch": 2.65, "learning_rate": 6.989548215918207e-07, "loss": 0.0019, "step": 11296 }, { "epoch": 2.65, "learning_rate": 6.98024262655872e-07, "loss": 0.0034, "step": 11297 }, { "epoch": 2.65, "learning_rate": 6.970943011840148e-07, "loss": 0.0045, "step": 11298 }, { "epoch": 2.65, "learning_rate": 6.961649372359868e-07, "loss": 0.0021, "step": 11299 }, { "epoch": 2.65, "learning_rate": 6.952361708714728e-07, "loss": 0.0023, "step": 11300 }, { "epoch": 2.65, "learning_rate": 6.943080021501325e-07, "loss": 0.0021, "step": 11301 }, { "epoch": 2.65, "learning_rate": 6.933804311315806e-07, "loss": 0.0038, "step": 11302 }, { "epoch": 2.65, "learning_rate": 6.924534578753972e-07, "loss": 0.0023, "step": 11303 }, { "epoch": 2.65, "learning_rate": 6.915270824411158e-07, "loss": 0.0005, "step": 11304 }, { "epoch": 2.65, "learning_rate": 6.90601304888241e-07, "loss": 0.0128, "step": 11305 }, { "epoch": 2.65, "learning_rate": 6.896761252762363e-07, "loss": 0.0081, "step": 11306 }, { "epoch": 2.65, "learning_rate": 6.887515436645254e-07, "loss": 0.0601, "step": 11307 }, { "epoch": 2.65, "learning_rate": 6.878275601124929e-07, "loss": 0.0008, "step": 11308 }, { "epoch": 2.65, "learning_rate": 6.869041746794847e-07, "loss": 0.0297, "step": 11309 }, { "epoch": 2.65, "learning_rate": 6.859813874248134e-07, "loss": 0.0162, "step": 11310 }, { "epoch": 2.65, "learning_rate": 6.850591984077481e-07, "loss": 0.0053, "step": 11311 }, { "epoch": 2.65, "learning_rate": 6.841376076875183e-07, "loss": 0.0026, "step": 11312 }, { "epoch": 2.66, "learning_rate": 6.8321661532332e-07, "loss": 0.0323, "step": 11313 }, { "epoch": 2.66, "learning_rate": 6.822962213743078e-07, "loss": 0.0016, "step": 11314 }, { "epoch": 2.66, "learning_rate": 6.813764258995992e-07, "loss": 0.0005, "step": 11315 }, { "epoch": 2.66, "learning_rate": 6.8045722895827e-07, "loss": 0.0134, "step": 11316 }, { "epoch": 2.66, "learning_rate": 6.795386306093632e-07, "loss": 0.0449, "step": 11317 }, { "epoch": 2.66, "learning_rate": 6.786206309118771e-07, "loss": 0.0025, "step": 11318 }, { "epoch": 2.66, "learning_rate": 6.777032299247788e-07, "loss": 0.0113, "step": 11319 }, { "epoch": 2.66, "learning_rate": 6.76786427706988e-07, "loss": 0.0125, "step": 11320 }, { "epoch": 2.66, "learning_rate": 6.758702243173931e-07, "loss": 0.0009, "step": 11321 }, { "epoch": 2.66, "learning_rate": 6.749546198148426e-07, "loss": 0.001, "step": 11322 }, { "epoch": 2.66, "learning_rate": 6.740396142581451e-07, "loss": 0.0003, "step": 11323 }, { "epoch": 2.66, "learning_rate": 6.731252077060712e-07, "loss": 0.0359, "step": 11324 }, { "epoch": 2.66, "learning_rate": 6.722114002173497e-07, "loss": 0.0147, "step": 11325 }, { "epoch": 2.66, "learning_rate": 6.7129819185068e-07, "loss": 0.0028, "step": 11326 }, { "epoch": 2.66, "learning_rate": 6.703855826647132e-07, "loss": 0.0015, "step": 11327 }, { "epoch": 2.66, "learning_rate": 6.694735727180679e-07, "loss": 0.0023, "step": 11328 }, { "epoch": 2.66, "learning_rate": 6.685621620693205e-07, "loss": 0.0002, "step": 11329 }, { "epoch": 2.66, "learning_rate": 6.676513507770133e-07, "loss": 0.0835, "step": 11330 }, { "epoch": 2.66, "learning_rate": 6.667411388996458e-07, "loss": 0.001, "step": 11331 }, { "epoch": 2.66, "learning_rate": 6.658315264956794e-07, "loss": 0.0165, "step": 11332 }, { "epoch": 2.66, "learning_rate": 6.649225136235415e-07, "loss": 0.0004, "step": 11333 }, { "epoch": 2.66, "learning_rate": 6.640141003416146e-07, "loss": 0.0243, "step": 11334 }, { "epoch": 2.66, "learning_rate": 6.631062867082516e-07, "loss": 0.0012, "step": 11335 }, { "epoch": 2.66, "learning_rate": 6.62199072781754e-07, "loss": 0.0052, "step": 11336 }, { "epoch": 2.66, "learning_rate": 6.612924586203961e-07, "loss": 0.0144, "step": 11337 }, { "epoch": 2.66, "learning_rate": 6.60386444282407e-07, "loss": 0.0004, "step": 11338 }, { "epoch": 2.66, "learning_rate": 6.594810298259835e-07, "loss": 0.0288, "step": 11339 }, { "epoch": 2.66, "learning_rate": 6.585762153092778e-07, "loss": 0.0066, "step": 11340 }, { "epoch": 2.66, "learning_rate": 6.576720007904047e-07, "loss": 0.0427, "step": 11341 }, { "epoch": 2.66, "learning_rate": 6.567683863274455e-07, "loss": 0.0655, "step": 11342 }, { "epoch": 2.66, "learning_rate": 6.558653719784369e-07, "loss": 0.0016, "step": 11343 }, { "epoch": 2.66, "learning_rate": 6.549629578013794e-07, "loss": 0.0387, "step": 11344 }, { "epoch": 2.66, "learning_rate": 6.540611438542332e-07, "loss": 0.0125, "step": 11345 }, { "epoch": 2.66, "learning_rate": 6.531599301949243e-07, "loss": 0.0048, "step": 11346 }, { "epoch": 2.66, "learning_rate": 6.522593168813374e-07, "loss": 0.0013, "step": 11347 }, { "epoch": 2.66, "learning_rate": 6.513593039713162e-07, "loss": 0.0012, "step": 11348 }, { "epoch": 2.66, "learning_rate": 6.504598915226723e-07, "loss": 0.0004, "step": 11349 }, { "epoch": 2.66, "learning_rate": 6.495610795931717e-07, "loss": 0.0447, "step": 11350 }, { "epoch": 2.66, "learning_rate": 6.486628682405449e-07, "loss": 0.0178, "step": 11351 }, { "epoch": 2.66, "learning_rate": 6.477652575224847e-07, "loss": 0.0008, "step": 11352 }, { "epoch": 2.66, "learning_rate": 6.468682474966448e-07, "loss": 0.0179, "step": 11353 }, { "epoch": 2.66, "learning_rate": 6.459718382206392e-07, "loss": 0.0011, "step": 11354 }, { "epoch": 2.66, "learning_rate": 6.450760297520453e-07, "loss": 0.0195, "step": 11355 }, { "epoch": 2.67, "learning_rate": 6.441808221484003e-07, "loss": 0.0257, "step": 11356 }, { "epoch": 2.67, "learning_rate": 6.432862154672004e-07, "loss": 0.0626, "step": 11357 }, { "epoch": 2.67, "learning_rate": 6.423922097659108e-07, "loss": 0.0001, "step": 11358 }, { "epoch": 2.67, "learning_rate": 6.41498805101951e-07, "loss": 0.0273, "step": 11359 }, { "epoch": 2.67, "learning_rate": 6.406060015327032e-07, "loss": 0.0054, "step": 11360 }, { "epoch": 2.67, "learning_rate": 6.397137991155123e-07, "loss": 0.001, "step": 11361 }, { "epoch": 2.67, "learning_rate": 6.388221979076869e-07, "loss": 0.01, "step": 11362 }, { "epoch": 2.67, "learning_rate": 6.379311979664915e-07, "loss": 0.0383, "step": 11363 }, { "epoch": 2.67, "learning_rate": 6.370407993491545e-07, "loss": 0.0311, "step": 11364 }, { "epoch": 2.67, "learning_rate": 6.361510021128692e-07, "loss": 0.0028, "step": 11365 }, { "epoch": 2.67, "learning_rate": 6.352618063147841e-07, "loss": 0.0402, "step": 11366 }, { "epoch": 2.67, "learning_rate": 6.34373212012015e-07, "loss": 0.0002, "step": 11367 }, { "epoch": 2.67, "learning_rate": 6.334852192616314e-07, "loss": 0.0001, "step": 11368 }, { "epoch": 2.67, "learning_rate": 6.325978281206734e-07, "loss": 0.0016, "step": 11369 }, { "epoch": 2.67, "learning_rate": 6.317110386461367e-07, "loss": 0.0213, "step": 11370 }, { "epoch": 2.67, "learning_rate": 6.308248508949799e-07, "loss": 0.0159, "step": 11371 }, { "epoch": 2.67, "learning_rate": 6.299392649241198e-07, "loss": 0.0056, "step": 11372 }, { "epoch": 2.67, "learning_rate": 6.290542807904398e-07, "loss": 0.0228, "step": 11373 }, { "epoch": 2.67, "learning_rate": 6.281698985507833e-07, "loss": 0.0099, "step": 11374 }, { "epoch": 2.67, "learning_rate": 6.272861182619539e-07, "loss": 0.0011, "step": 11375 }, { "epoch": 2.67, "learning_rate": 6.264029399807148e-07, "loss": 0.0011, "step": 11376 }, { "epoch": 2.67, "learning_rate": 6.255203637637907e-07, "loss": 0.0102, "step": 11377 }, { "epoch": 2.67, "learning_rate": 6.246383896678754e-07, "loss": 0.0545, "step": 11378 }, { "epoch": 2.67, "learning_rate": 6.237570177496122e-07, "loss": 0.0003, "step": 11379 }, { "epoch": 2.67, "learning_rate": 6.228762480656148e-07, "loss": 0.0163, "step": 11380 }, { "epoch": 2.67, "learning_rate": 6.219960806724512e-07, "loss": 0.0099, "step": 11381 }, { "epoch": 2.67, "learning_rate": 6.211165156266596e-07, "loss": 0.0233, "step": 11382 }, { "epoch": 2.67, "learning_rate": 6.202375529847304e-07, "loss": 0.0038, "step": 11383 }, { "epoch": 2.67, "learning_rate": 6.193591928031184e-07, "loss": 0.0142, "step": 11384 }, { "epoch": 2.67, "learning_rate": 6.184814351382451e-07, "loss": 0.0227, "step": 11385 }, { "epoch": 2.67, "learning_rate": 6.176042800464855e-07, "loss": 0.0111, "step": 11386 }, { "epoch": 2.67, "learning_rate": 6.167277275841788e-07, "loss": 0.0119, "step": 11387 }, { "epoch": 2.67, "learning_rate": 6.158517778076256e-07, "loss": 0.0533, "step": 11388 }, { "epoch": 2.67, "learning_rate": 6.149764307730899e-07, "loss": 0.0048, "step": 11389 }, { "epoch": 2.67, "learning_rate": 6.141016865367932e-07, "loss": 0.0354, "step": 11390 }, { "epoch": 2.67, "learning_rate": 6.13227545154923e-07, "loss": 0.0301, "step": 11391 }, { "epoch": 2.67, "learning_rate": 6.123540066836231e-07, "loss": 0.0009, "step": 11392 }, { "epoch": 2.67, "learning_rate": 6.114810711789986e-07, "loss": 0.0011, "step": 11393 }, { "epoch": 2.67, "learning_rate": 6.106087386971227e-07, "loss": 0.0083, "step": 11394 }, { "epoch": 2.67, "learning_rate": 6.097370092940214e-07, "loss": 0.0001, "step": 11395 }, { "epoch": 2.67, "learning_rate": 6.088658830256888e-07, "loss": 0.0423, "step": 11396 }, { "epoch": 2.67, "learning_rate": 6.079953599480726e-07, "loss": 0.0017, "step": 11397 }, { "epoch": 2.68, "learning_rate": 6.071254401170912e-07, "loss": 0.0004, "step": 11398 }, { "epoch": 2.68, "learning_rate": 6.062561235886166e-07, "loss": 0.0148, "step": 11399 }, { "epoch": 2.68, "learning_rate": 6.053874104184853e-07, "loss": 0.0367, "step": 11400 }, { "epoch": 2.68, "learning_rate": 6.04519300662495e-07, "loss": 0.001, "step": 11401 }, { "epoch": 2.68, "learning_rate": 6.036517943764053e-07, "loss": 0.0555, "step": 11402 }, { "epoch": 2.68, "learning_rate": 6.02784891615934e-07, "loss": 0.0133, "step": 11403 }, { "epoch": 2.68, "learning_rate": 6.01918592436761e-07, "loss": 0.0002, "step": 11404 }, { "epoch": 2.68, "learning_rate": 6.010528968945318e-07, "loss": 0.0007, "step": 11405 }, { "epoch": 2.68, "learning_rate": 6.001878050448484e-07, "loss": 0.0268, "step": 11406 }, { "epoch": 2.68, "learning_rate": 5.993233169432744e-07, "loss": 0.0025, "step": 11407 }, { "epoch": 2.68, "learning_rate": 5.984594326453386e-07, "loss": 0.0006, "step": 11408 }, { "epoch": 2.68, "learning_rate": 5.975961522065243e-07, "loss": 0.0009, "step": 11409 }, { "epoch": 2.68, "learning_rate": 5.96733475682283e-07, "loss": 0.0051, "step": 11410 }, { "epoch": 2.68, "learning_rate": 5.958714031280244e-07, "loss": 0.0064, "step": 11411 }, { "epoch": 2.68, "learning_rate": 5.950099345991167e-07, "loss": 0.0159, "step": 11412 }, { "epoch": 2.68, "learning_rate": 5.941490701508912e-07, "loss": 0.0014, "step": 11413 }, { "epoch": 2.68, "learning_rate": 5.932888098386458e-07, "loss": 0.0076, "step": 11414 }, { "epoch": 2.68, "learning_rate": 5.924291537176308e-07, "loss": 0.0077, "step": 11415 }, { "epoch": 2.68, "learning_rate": 5.91570101843062e-07, "loss": 0.0428, "step": 11416 }, { "epoch": 2.68, "learning_rate": 5.907116542701185e-07, "loss": 0.0003, "step": 11417 }, { "epoch": 2.68, "learning_rate": 5.898538110539364e-07, "loss": 0.0061, "step": 11418 }, { "epoch": 2.68, "learning_rate": 5.88996572249616e-07, "loss": 0.0035, "step": 11419 }, { "epoch": 2.68, "learning_rate": 5.881399379122143e-07, "loss": 0.0548, "step": 11420 }, { "epoch": 2.68, "learning_rate": 5.872839080967574e-07, "loss": 0.0047, "step": 11421 }, { "epoch": 2.68, "learning_rate": 5.864284828582256e-07, "loss": 0.001, "step": 11422 }, { "epoch": 2.68, "learning_rate": 5.855736622515629e-07, "loss": 0.0002, "step": 11423 }, { "epoch": 2.68, "learning_rate": 5.84719446331673e-07, "loss": 0.0036, "step": 11424 }, { "epoch": 2.68, "learning_rate": 5.838658351534221e-07, "loss": 0.0075, "step": 11425 }, { "epoch": 2.68, "learning_rate": 5.830128287716408e-07, "loss": 0.0662, "step": 11426 }, { "epoch": 2.68, "learning_rate": 5.821604272411163e-07, "loss": 0.0066, "step": 11427 }, { "epoch": 2.68, "learning_rate": 5.813086306165961e-07, "loss": 0.0009, "step": 11428 }, { "epoch": 2.68, "learning_rate": 5.804574389527906e-07, "loss": 0.0235, "step": 11429 }, { "epoch": 2.68, "learning_rate": 5.796068523043752e-07, "loss": 0.0045, "step": 11430 }, { "epoch": 2.68, "learning_rate": 5.787568707259805e-07, "loss": 0.0213, "step": 11431 }, { "epoch": 2.68, "learning_rate": 5.779074942722007e-07, "loss": 0.0024, "step": 11432 }, { "epoch": 2.68, "learning_rate": 5.770587229975911e-07, "loss": 0.0457, "step": 11433 }, { "epoch": 2.68, "learning_rate": 5.762105569566689e-07, "loss": 0.0185, "step": 11434 }, { "epoch": 2.68, "learning_rate": 5.753629962039108e-07, "loss": 0.0298, "step": 11435 }, { "epoch": 2.68, "learning_rate": 5.745160407937545e-07, "loss": 0.0019, "step": 11436 }, { "epoch": 2.68, "learning_rate": 5.736696907806028e-07, "loss": 0.0004, "step": 11437 }, { "epoch": 2.68, "learning_rate": 5.728239462188157e-07, "loss": 0.0058, "step": 11438 }, { "epoch": 2.68, "learning_rate": 5.71978807162713e-07, "loss": 0.0065, "step": 11439 }, { "epoch": 2.68, "learning_rate": 5.711342736665793e-07, "loss": 0.0067, "step": 11440 }, { "epoch": 2.69, "learning_rate": 5.702903457846575e-07, "loss": 0.0023, "step": 11441 }, { "epoch": 2.69, "learning_rate": 5.694470235711569e-07, "loss": 0.0001, "step": 11442 }, { "epoch": 2.69, "learning_rate": 5.686043070802394e-07, "loss": 0.0024, "step": 11443 }, { "epoch": 2.69, "learning_rate": 5.677621963660351e-07, "loss": 0.0002, "step": 11444 }, { "epoch": 2.69, "learning_rate": 5.669206914826308e-07, "loss": 0.048, "step": 11445 }, { "epoch": 2.69, "learning_rate": 5.660797924840778e-07, "loss": 0.0304, "step": 11446 }, { "epoch": 2.69, "learning_rate": 5.65239499424387e-07, "loss": 0.0799, "step": 11447 }, { "epoch": 2.69, "learning_rate": 5.643998123575289e-07, "loss": 0.0024, "step": 11448 }, { "epoch": 2.69, "learning_rate": 5.635607313374359e-07, "loss": 0.0312, "step": 11449 }, { "epoch": 2.69, "learning_rate": 5.627222564180058e-07, "loss": 0.0373, "step": 11450 }, { "epoch": 2.69, "learning_rate": 5.618843876530899e-07, "loss": 0.0159, "step": 11451 }, { "epoch": 2.69, "learning_rate": 5.610471250965033e-07, "loss": 0.0014, "step": 11452 }, { "epoch": 2.69, "learning_rate": 5.602104688020282e-07, "loss": 0.0033, "step": 11453 }, { "epoch": 2.69, "learning_rate": 5.593744188233996e-07, "loss": 0.0059, "step": 11454 }, { "epoch": 2.69, "learning_rate": 5.585389752143177e-07, "loss": 0.0229, "step": 11455 }, { "epoch": 2.69, "learning_rate": 5.577041380284409e-07, "loss": 0.0014, "step": 11456 }, { "epoch": 2.69, "learning_rate": 5.568699073193928e-07, "loss": 0.0073, "step": 11457 }, { "epoch": 2.69, "learning_rate": 5.560362831407562e-07, "loss": 0.0238, "step": 11458 }, { "epoch": 2.69, "learning_rate": 5.552032655460726e-07, "loss": 0.0011, "step": 11459 }, { "epoch": 2.69, "learning_rate": 5.543708545888482e-07, "loss": 0.0015, "step": 11460 }, { "epoch": 2.69, "learning_rate": 5.53539050322548e-07, "loss": 0.0017, "step": 11461 }, { "epoch": 2.69, "learning_rate": 5.527078528005992e-07, "loss": 0.0424, "step": 11462 }, { "epoch": 2.69, "learning_rate": 5.51877262076389e-07, "loss": 0.0025, "step": 11463 }, { "epoch": 2.69, "learning_rate": 5.510472782032661e-07, "loss": 0.0008, "step": 11464 }, { "epoch": 2.69, "learning_rate": 5.502179012345387e-07, "loss": 0.0018, "step": 11465 }, { "epoch": 2.69, "learning_rate": 5.493891312234801e-07, "loss": 0.0061, "step": 11466 }, { "epoch": 2.69, "learning_rate": 5.485609682233217e-07, "loss": 0.0106, "step": 11467 }, { "epoch": 2.69, "learning_rate": 5.477334122872535e-07, "loss": 0.0022, "step": 11468 }, { "epoch": 2.69, "learning_rate": 5.469064634684329e-07, "loss": 0.0001, "step": 11469 }, { "epoch": 2.69, "learning_rate": 5.460801218199719e-07, "loss": 0.0029, "step": 11470 }, { "epoch": 2.69, "learning_rate": 5.452543873949489e-07, "loss": 0.0004, "step": 11471 }, { "epoch": 2.69, "learning_rate": 5.444292602463964e-07, "loss": 0.0121, "step": 11472 }, { "epoch": 2.69, "learning_rate": 5.43604740427317e-07, "loss": 0.0068, "step": 11473 }, { "epoch": 2.69, "learning_rate": 5.427808279906676e-07, "loss": 0.0017, "step": 11474 }, { "epoch": 2.69, "learning_rate": 5.41957522989367e-07, "loss": 0.0002, "step": 11475 }, { "epoch": 2.69, "learning_rate": 5.411348254762949e-07, "loss": 0.0367, "step": 11476 }, { "epoch": 2.69, "learning_rate": 5.403127355042959e-07, "loss": 0.0034, "step": 11477 }, { "epoch": 2.69, "learning_rate": 5.394912531261731e-07, "loss": 0.0531, "step": 11478 }, { "epoch": 2.69, "learning_rate": 5.386703783946867e-07, "loss": 0.0008, "step": 11479 }, { "epoch": 2.69, "learning_rate": 5.378501113625634e-07, "loss": 0.001, "step": 11480 }, { "epoch": 2.69, "learning_rate": 5.370304520824876e-07, "loss": 0.0006, "step": 11481 }, { "epoch": 2.69, "learning_rate": 5.362114006071084e-07, "loss": 0.0073, "step": 11482 }, { "epoch": 2.69, "learning_rate": 5.353929569890315e-07, "loss": 0.0004, "step": 11483 }, { "epoch": 2.7, "learning_rate": 5.345751212808237e-07, "loss": 0.0124, "step": 11484 }, { "epoch": 2.7, "learning_rate": 5.337578935350185e-07, "loss": 0.0254, "step": 11485 }, { "epoch": 2.7, "learning_rate": 5.32941273804104e-07, "loss": 0.0374, "step": 11486 }, { "epoch": 2.7, "learning_rate": 5.321252621405315e-07, "loss": 0.0002, "step": 11487 }, { "epoch": 2.7, "learning_rate": 5.313098585967114e-07, "loss": 0.0022, "step": 11488 }, { "epoch": 2.7, "learning_rate": 5.304950632250206e-07, "loss": 0.01, "step": 11489 }, { "epoch": 2.7, "learning_rate": 5.296808760777916e-07, "loss": 0.0026, "step": 11490 }, { "epoch": 2.7, "learning_rate": 5.288672972073195e-07, "loss": 0.0018, "step": 11491 }, { "epoch": 2.7, "learning_rate": 5.280543266658588e-07, "loss": 0.0001, "step": 11492 }, { "epoch": 2.7, "learning_rate": 5.272419645056271e-07, "loss": 0.0022, "step": 11493 }, { "epoch": 2.7, "learning_rate": 5.264302107788066e-07, "loss": 0.0111, "step": 11494 }, { "epoch": 2.7, "learning_rate": 5.256190655375304e-07, "loss": 0.0061, "step": 11495 }, { "epoch": 2.7, "learning_rate": 5.248085288339e-07, "loss": 0.0006, "step": 11496 }, { "epoch": 2.7, "learning_rate": 5.23998600719976e-07, "loss": 0.0017, "step": 11497 }, { "epoch": 2.7, "learning_rate": 5.231892812477813e-07, "loss": 0.0736, "step": 11498 }, { "epoch": 2.7, "learning_rate": 5.223805704692974e-07, "loss": 0.0195, "step": 11499 }, { "epoch": 2.7, "learning_rate": 5.215724684364676e-07, "loss": 0.0145, "step": 11500 }, { "epoch": 2.7, "learning_rate": 5.207649752011956e-07, "loss": 0.0405, "step": 11501 }, { "epoch": 2.7, "learning_rate": 5.199580908153467e-07, "loss": 0.0159, "step": 11502 }, { "epoch": 2.7, "learning_rate": 5.191518153307484e-07, "loss": 0.0323, "step": 11503 }, { "epoch": 2.7, "learning_rate": 5.183461487991848e-07, "loss": 0.0001, "step": 11504 }, { "epoch": 2.7, "learning_rate": 5.175410912724055e-07, "loss": 0.0015, "step": 11505 }, { "epoch": 2.7, "learning_rate": 5.167366428021203e-07, "loss": 0.0094, "step": 11506 }, { "epoch": 2.7, "learning_rate": 5.159328034399979e-07, "loss": 0.0059, "step": 11507 }, { "epoch": 2.7, "learning_rate": 5.15129573237666e-07, "loss": 0.002, "step": 11508 }, { "epoch": 2.7, "learning_rate": 5.143269522467198e-07, "loss": 0.008, "step": 11509 }, { "epoch": 2.7, "learning_rate": 5.135249405187104e-07, "loss": 0.0107, "step": 11510 }, { "epoch": 2.7, "learning_rate": 5.127235381051488e-07, "loss": 0.014, "step": 11511 }, { "epoch": 2.7, "learning_rate": 5.119227450575115e-07, "loss": 0.0142, "step": 11512 }, { "epoch": 2.7, "learning_rate": 5.111225614272308e-07, "loss": 0.0297, "step": 11513 }, { "epoch": 2.7, "learning_rate": 5.103229872657078e-07, "loss": 0.0245, "step": 11514 }, { "epoch": 2.7, "learning_rate": 5.095240226242915e-07, "loss": 0.0243, "step": 11515 }, { "epoch": 2.7, "learning_rate": 5.08725667554304e-07, "loss": 0.0016, "step": 11516 }, { "epoch": 2.7, "learning_rate": 5.07927922107021e-07, "loss": 0.0271, "step": 11517 }, { "epoch": 2.7, "learning_rate": 5.071307863336838e-07, "loss": 0.0918, "step": 11518 }, { "epoch": 2.7, "learning_rate": 5.063342602854926e-07, "loss": 0.0003, "step": 11519 }, { "epoch": 2.7, "learning_rate": 5.055383440136041e-07, "loss": 0.0001, "step": 11520 }, { "epoch": 2.7, "learning_rate": 5.047430375691443e-07, "loss": 0.0031, "step": 11521 }, { "epoch": 2.7, "learning_rate": 5.039483410031942e-07, "loss": 0.023, "step": 11522 }, { "epoch": 2.7, "learning_rate": 5.031542543667967e-07, "loss": 0.0139, "step": 11523 }, { "epoch": 2.7, "learning_rate": 5.02360777710954e-07, "loss": 0.0001, "step": 11524 }, { "epoch": 2.7, "learning_rate": 5.015679110866345e-07, "loss": 0.004, "step": 11525 }, { "epoch": 2.71, "learning_rate": 5.007756545447617e-07, "loss": 0.0002, "step": 11526 }, { "epoch": 2.71, "learning_rate": 4.999840081362206e-07, "loss": 0.0415, "step": 11527 }, { "epoch": 2.71, "learning_rate": 4.991929719118627e-07, "loss": 0.0015, "step": 11528 }, { "epoch": 2.71, "learning_rate": 4.984025459224917e-07, "loss": 0.0156, "step": 11529 }, { "epoch": 2.71, "learning_rate": 4.976127302188816e-07, "loss": 0.0011, "step": 11530 }, { "epoch": 2.71, "learning_rate": 4.968235248517562e-07, "loss": 0.0074, "step": 11531 }, { "epoch": 2.71, "learning_rate": 4.960349298718103e-07, "loss": 0.0053, "step": 11532 }, { "epoch": 2.71, "learning_rate": 4.952469453296926e-07, "loss": 0.0002, "step": 11533 }, { "epoch": 2.71, "learning_rate": 4.944595712760192e-07, "loss": 0.0077, "step": 11534 }, { "epoch": 2.71, "learning_rate": 4.936728077613573e-07, "loss": 0.0001, "step": 11535 }, { "epoch": 2.71, "learning_rate": 4.928866548362432e-07, "loss": 0.0052, "step": 11536 }, { "epoch": 2.71, "learning_rate": 4.921011125511733e-07, "loss": 0.0236, "step": 11537 }, { "epoch": 2.71, "learning_rate": 4.913161809566014e-07, "loss": 0.0054, "step": 11538 }, { "epoch": 2.71, "learning_rate": 4.905318601029429e-07, "loss": 0.0078, "step": 11539 }, { "epoch": 2.71, "learning_rate": 4.897481500405732e-07, "loss": 0.0002, "step": 11540 }, { "epoch": 2.71, "learning_rate": 4.889650508198329e-07, "loss": 0.0005, "step": 11541 }, { "epoch": 2.71, "learning_rate": 4.881825624910197e-07, "loss": 0.0245, "step": 11542 }, { "epoch": 2.71, "learning_rate": 4.874006851043911e-07, "loss": 0.0023, "step": 11543 }, { "epoch": 2.71, "learning_rate": 4.86619418710167e-07, "loss": 0.0372, "step": 11544 }, { "epoch": 2.71, "learning_rate": 4.858387633585282e-07, "loss": 0.0295, "step": 11545 }, { "epoch": 2.71, "learning_rate": 4.850587190996203e-07, "loss": 0.0006, "step": 11546 }, { "epoch": 2.71, "learning_rate": 4.842792859835377e-07, "loss": 0.0029, "step": 11547 }, { "epoch": 2.71, "learning_rate": 4.835004640603491e-07, "loss": 0.0014, "step": 11548 }, { "epoch": 2.71, "learning_rate": 4.827222533800746e-07, "loss": 0.0015, "step": 11549 }, { "epoch": 2.71, "learning_rate": 4.819446539927031e-07, "loss": 0.0008, "step": 11550 }, { "epoch": 2.71, "learning_rate": 4.811676659481746e-07, "loss": 0.0016, "step": 11551 }, { "epoch": 2.71, "learning_rate": 4.803912892963969e-07, "loss": 0.0119, "step": 11552 }, { "epoch": 2.71, "learning_rate": 4.796155240872369e-07, "loss": 0.0421, "step": 11553 }, { "epoch": 2.71, "learning_rate": 4.788403703705213e-07, "loss": 0.0028, "step": 11554 }, { "epoch": 2.71, "learning_rate": 4.780658281960393e-07, "loss": 0.0019, "step": 11555 }, { "epoch": 2.71, "learning_rate": 4.772918976135366e-07, "loss": 0.0059, "step": 11556 }, { "epoch": 2.71, "learning_rate": 4.7651857867272665e-07, "loss": 0.0033, "step": 11557 }, { "epoch": 2.71, "learning_rate": 4.757458714232777e-07, "loss": 0.0261, "step": 11558 }, { "epoch": 2.71, "learning_rate": 4.7497377591481984e-07, "loss": 0.007, "step": 11559 }, { "epoch": 2.71, "learning_rate": 4.742022921969436e-07, "loss": 0.0081, "step": 11560 }, { "epoch": 2.71, "learning_rate": 4.734314203192047e-07, "loss": 0.0072, "step": 11561 }, { "epoch": 2.71, "learning_rate": 4.726611603311138e-07, "loss": 0.059, "step": 11562 }, { "epoch": 2.71, "learning_rate": 4.7189151228214435e-07, "loss": 0.0206, "step": 11563 }, { "epoch": 2.71, "learning_rate": 4.7112247622173145e-07, "loss": 0.0056, "step": 11564 }, { "epoch": 2.71, "learning_rate": 4.7035405219927e-07, "loss": 0.0394, "step": 11565 }, { "epoch": 2.71, "learning_rate": 4.6958624026411713e-07, "loss": 0.0005, "step": 11566 }, { "epoch": 2.71, "learning_rate": 4.688190404655857e-07, "loss": 0.0006, "step": 11567 }, { "epoch": 2.71, "learning_rate": 4.680524528529573e-07, "loss": 0.0305, "step": 11568 }, { "epoch": 2.72, "learning_rate": 4.672864774754649e-07, "loss": 0.0472, "step": 11569 }, { "epoch": 2.72, "learning_rate": 4.6652111438231117e-07, "loss": 0.0002, "step": 11570 }, { "epoch": 2.72, "learning_rate": 4.6575636362265254e-07, "loss": 0.0108, "step": 11571 }, { "epoch": 2.72, "learning_rate": 4.649922252456096e-07, "loss": 0.0034, "step": 11572 }, { "epoch": 2.72, "learning_rate": 4.6422869930026314e-07, "loss": 0.0004, "step": 11573 }, { "epoch": 2.72, "learning_rate": 4.6346578583565503e-07, "loss": 0.0056, "step": 11574 }, { "epoch": 2.72, "learning_rate": 4.627034849007861e-07, "loss": 0.0006, "step": 11575 }, { "epoch": 2.72, "learning_rate": 4.619417965446171e-07, "loss": 0.0072, "step": 11576 }, { "epoch": 2.72, "learning_rate": 4.6118072081607455e-07, "loss": 0.0015, "step": 11577 }, { "epoch": 2.72, "learning_rate": 4.604202577640393e-07, "loss": 0.0008, "step": 11578 }, { "epoch": 2.72, "learning_rate": 4.5966040743735675e-07, "loss": 0.0012, "step": 11579 }, { "epoch": 2.72, "learning_rate": 4.5890116988483335e-07, "loss": 0.0306, "step": 11580 }, { "epoch": 2.72, "learning_rate": 4.5814254515523125e-07, "loss": 0.0052, "step": 11581 }, { "epoch": 2.72, "learning_rate": 4.573845332972826e-07, "loss": 0.0033, "step": 11582 }, { "epoch": 2.72, "learning_rate": 4.566271343596684e-07, "loss": 0.0007, "step": 11583 }, { "epoch": 2.72, "learning_rate": 4.558703483910398e-07, "loss": 0.0382, "step": 11584 }, { "epoch": 2.72, "learning_rate": 4.5511417544000234e-07, "loss": 0.0413, "step": 11585 }, { "epoch": 2.72, "learning_rate": 4.543586155551283e-07, "loss": 0.0198, "step": 11586 }, { "epoch": 2.72, "learning_rate": 4.536036687849432e-07, "loss": 0.0025, "step": 11587 }, { "epoch": 2.72, "learning_rate": 4.528493351779395e-07, "loss": 0.0194, "step": 11588 }, { "epoch": 2.72, "learning_rate": 4.5209561478256834e-07, "loss": 0.006, "step": 11589 }, { "epoch": 2.72, "learning_rate": 4.5134250764724107e-07, "loss": 0.0332, "step": 11590 }, { "epoch": 2.72, "learning_rate": 4.5059001382032786e-07, "loss": 0.0119, "step": 11591 }, { "epoch": 2.72, "learning_rate": 4.4983813335016116e-07, "loss": 0.0065, "step": 11592 }, { "epoch": 2.72, "learning_rate": 4.490868662850356e-07, "loss": 0.0181, "step": 11593 }, { "epoch": 2.72, "learning_rate": 4.4833621267320604e-07, "loss": 0.0001, "step": 11594 }, { "epoch": 2.72, "learning_rate": 4.475861725628838e-07, "loss": 0.0011, "step": 11595 }, { "epoch": 2.72, "learning_rate": 4.468367460022449e-07, "loss": 0.0023, "step": 11596 }, { "epoch": 2.72, "learning_rate": 4.460879330394252e-07, "loss": 0.0001, "step": 11597 }, { "epoch": 2.72, "learning_rate": 4.4533973372252294e-07, "loss": 0.0004, "step": 11598 }, { "epoch": 2.72, "learning_rate": 4.4459214809958963e-07, "loss": 0.0013, "step": 11599 }, { "epoch": 2.72, "learning_rate": 4.4384517621864797e-07, "loss": 0.0015, "step": 11600 }, { "epoch": 2.72, "learning_rate": 4.430988181276707e-07, "loss": 0.0113, "step": 11601 }, { "epoch": 2.72, "learning_rate": 4.4235307387460293e-07, "loss": 0.0003, "step": 11602 }, { "epoch": 2.72, "learning_rate": 4.4160794350733615e-07, "loss": 0.0177, "step": 11603 }, { "epoch": 2.72, "learning_rate": 4.408634270737344e-07, "loss": 0.0604, "step": 11604 }, { "epoch": 2.72, "learning_rate": 4.4011952462161834e-07, "loss": 0.0633, "step": 11605 }, { "epoch": 2.72, "learning_rate": 4.3937623619876635e-07, "loss": 0.0007, "step": 11606 }, { "epoch": 2.72, "learning_rate": 4.3863356185292137e-07, "loss": 0.0002, "step": 11607 }, { "epoch": 2.72, "learning_rate": 4.378915016317831e-07, "loss": 0.0074, "step": 11608 }, { "epoch": 2.72, "learning_rate": 4.371500555830166e-07, "loss": 0.0018, "step": 11609 }, { "epoch": 2.72, "learning_rate": 4.364092237542428e-07, "loss": 0.0274, "step": 11610 }, { "epoch": 2.73, "learning_rate": 4.356690061930469e-07, "loss": 0.0002, "step": 11611 }, { "epoch": 2.73, "learning_rate": 4.3492940294696975e-07, "loss": 0.0045, "step": 11612 }, { "epoch": 2.73, "learning_rate": 4.3419041406352e-07, "loss": 0.0304, "step": 11613 }, { "epoch": 2.73, "learning_rate": 4.3345203959016093e-07, "loss": 0.0341, "step": 11614 }, { "epoch": 2.73, "learning_rate": 4.327142795743167e-07, "loss": 0.0002, "step": 11615 }, { "epoch": 2.73, "learning_rate": 4.31977134063376e-07, "loss": 0.0003, "step": 11616 }, { "epoch": 2.73, "learning_rate": 4.3124060310468343e-07, "loss": 0.0615, "step": 11617 }, { "epoch": 2.73, "learning_rate": 4.3050468674554977e-07, "loss": 0.0034, "step": 11618 }, { "epoch": 2.73, "learning_rate": 4.2976938503323737e-07, "loss": 0.0191, "step": 11619 }, { "epoch": 2.73, "learning_rate": 4.2903469801497953e-07, "loss": 0.0012, "step": 11620 }, { "epoch": 2.73, "learning_rate": 4.283006257379607e-07, "loss": 0.0247, "step": 11621 }, { "epoch": 2.73, "learning_rate": 4.275671682493343e-07, "loss": 0.0215, "step": 11622 }, { "epoch": 2.73, "learning_rate": 4.268343255962082e-07, "loss": 0.0023, "step": 11623 }, { "epoch": 2.73, "learning_rate": 4.261020978256525e-07, "loss": 0.0006, "step": 11624 }, { "epoch": 2.73, "learning_rate": 4.2537048498469847e-07, "loss": 0.0196, "step": 11625 }, { "epoch": 2.73, "learning_rate": 4.246394871203374e-07, "loss": 0.0197, "step": 11626 }, { "epoch": 2.73, "learning_rate": 4.239091042795218e-07, "loss": 0.002, "step": 11627 }, { "epoch": 2.73, "learning_rate": 4.2317933650916186e-07, "loss": 0.0001, "step": 11628 }, { "epoch": 2.73, "learning_rate": 4.2245018385613347e-07, "loss": 0.0144, "step": 11629 }, { "epoch": 2.73, "learning_rate": 4.2172164636726796e-07, "loss": 0.0015, "step": 11630 }, { "epoch": 2.73, "learning_rate": 4.2099372408935803e-07, "loss": 0.0023, "step": 11631 }, { "epoch": 2.73, "learning_rate": 4.2026641706916174e-07, "loss": 0.0003, "step": 11632 }, { "epoch": 2.73, "learning_rate": 4.1953972535338947e-07, "loss": 0.0211, "step": 11633 }, { "epoch": 2.73, "learning_rate": 4.188136489887218e-07, "loss": 0.0011, "step": 11634 }, { "epoch": 2.73, "learning_rate": 4.1808818802178796e-07, "loss": 0.0017, "step": 11635 }, { "epoch": 2.73, "learning_rate": 4.1736334249918963e-07, "loss": 0.0007, "step": 11636 }, { "epoch": 2.73, "learning_rate": 4.1663911246748067e-07, "loss": 0.0039, "step": 11637 }, { "epoch": 2.73, "learning_rate": 4.159154979731816e-07, "loss": 0.0056, "step": 11638 }, { "epoch": 2.73, "learning_rate": 4.1519249906276425e-07, "loss": 0.0008, "step": 11639 }, { "epoch": 2.73, "learning_rate": 4.144701157826703e-07, "loss": 0.0007, "step": 11640 }, { "epoch": 2.73, "learning_rate": 4.137483481792981e-07, "loss": 0.0072, "step": 11641 }, { "epoch": 2.73, "learning_rate": 4.130271962990073e-07, "loss": 0.0027, "step": 11642 }, { "epoch": 2.73, "learning_rate": 4.123066601881165e-07, "loss": 0.0112, "step": 11643 }, { "epoch": 2.73, "learning_rate": 4.115867398929041e-07, "loss": 0.003, "step": 11644 }, { "epoch": 2.73, "learning_rate": 4.1086743545961425e-07, "loss": 0.0006, "step": 11645 }, { "epoch": 2.73, "learning_rate": 4.1014874693444453e-07, "loss": 0.0028, "step": 11646 }, { "epoch": 2.73, "learning_rate": 4.0943067436355564e-07, "loss": 0.035, "step": 11647 }, { "epoch": 2.73, "learning_rate": 4.0871321779307195e-07, "loss": 0.0033, "step": 11648 }, { "epoch": 2.73, "learning_rate": 4.0799637726907424e-07, "loss": 0.0393, "step": 11649 }, { "epoch": 2.73, "learning_rate": 4.072801528376047e-07, "loss": 0.0042, "step": 11650 }, { "epoch": 2.73, "learning_rate": 4.065645445446664e-07, "loss": 0.0141, "step": 11651 }, { "epoch": 2.73, "learning_rate": 4.0584955243622383e-07, "loss": 0.001, "step": 11652 }, { "epoch": 2.73, "learning_rate": 4.051351765581979e-07, "loss": 0.0006, "step": 11653 }, { "epoch": 2.74, "learning_rate": 4.044214169564775e-07, "loss": 0.0062, "step": 11654 }, { "epoch": 2.74, "learning_rate": 4.0370827367690266e-07, "loss": 0.0219, "step": 11655 }, { "epoch": 2.74, "learning_rate": 4.029957467652812e-07, "loss": 0.0025, "step": 11656 }, { "epoch": 2.74, "learning_rate": 4.0228383626737756e-07, "loss": 0.0246, "step": 11657 }, { "epoch": 2.74, "learning_rate": 4.015725422289185e-07, "loss": 0.0012, "step": 11658 }, { "epoch": 2.74, "learning_rate": 4.0086186469558975e-07, "loss": 0.0096, "step": 11659 }, { "epoch": 2.74, "learning_rate": 4.0015180371303585e-07, "loss": 0.0036, "step": 11660 }, { "epoch": 2.74, "learning_rate": 3.994423593268681e-07, "loss": 0.002, "step": 11661 }, { "epoch": 2.74, "learning_rate": 3.987335315826513e-07, "loss": 0.0359, "step": 11662 }, { "epoch": 2.74, "learning_rate": 3.9802532052591327e-07, "loss": 0.0334, "step": 11663 }, { "epoch": 2.74, "learning_rate": 3.9731772620214216e-07, "loss": 0.0169, "step": 11664 }, { "epoch": 2.74, "learning_rate": 3.966107486567894e-07, "loss": 0.0103, "step": 11665 }, { "epoch": 2.74, "learning_rate": 3.959043879352609e-07, "loss": 0.0221, "step": 11666 }, { "epoch": 2.74, "learning_rate": 3.9519864408292585e-07, "loss": 0.0027, "step": 11667 }, { "epoch": 2.74, "learning_rate": 3.9449351714511584e-07, "loss": 0.0025, "step": 11668 }, { "epoch": 2.74, "learning_rate": 3.937890071671202e-07, "loss": 0.0068, "step": 11669 }, { "epoch": 2.74, "learning_rate": 3.930851141941916e-07, "loss": 0.0164, "step": 11670 }, { "epoch": 2.74, "learning_rate": 3.923818382715372e-07, "loss": 0.0269, "step": 11671 }, { "epoch": 2.74, "learning_rate": 3.916791794443309e-07, "loss": 0.0105, "step": 11672 }, { "epoch": 2.74, "learning_rate": 3.9097713775770206e-07, "loss": 0.0013, "step": 11673 }, { "epoch": 2.74, "learning_rate": 3.9027571325674584e-07, "loss": 0.0081, "step": 11674 }, { "epoch": 2.74, "learning_rate": 3.895749059865128e-07, "loss": 0.0283, "step": 11675 }, { "epoch": 2.74, "learning_rate": 3.888747159920148e-07, "loss": 0.0004, "step": 11676 }, { "epoch": 2.74, "learning_rate": 3.881751433182268e-07, "loss": 0.0031, "step": 11677 }, { "epoch": 2.74, "learning_rate": 3.874761880100808e-07, "loss": 0.0205, "step": 11678 }, { "epoch": 2.74, "learning_rate": 3.8677785011247196e-07, "loss": 0.0548, "step": 11679 }, { "epoch": 2.74, "learning_rate": 3.860801296702521e-07, "loss": 0.0151, "step": 11680 }, { "epoch": 2.74, "learning_rate": 3.853830267282388e-07, "loss": 0.0104, "step": 11681 }, { "epoch": 2.74, "learning_rate": 3.84686541331204e-07, "loss": 0.0066, "step": 11682 }, { "epoch": 2.74, "learning_rate": 3.839906735238841e-07, "loss": 0.016, "step": 11683 }, { "epoch": 2.74, "learning_rate": 3.8329542335097447e-07, "loss": 0.0499, "step": 11684 }, { "epoch": 2.74, "learning_rate": 3.8260079085713165e-07, "loss": 0.0008, "step": 11685 }, { "epoch": 2.74, "learning_rate": 3.819067760869699e-07, "loss": 0.0027, "step": 11686 }, { "epoch": 2.74, "learning_rate": 3.8121337908506586e-07, "loss": 0.0012, "step": 11687 }, { "epoch": 2.74, "learning_rate": 3.8052059989595823e-07, "loss": 0.0082, "step": 11688 }, { "epoch": 2.74, "learning_rate": 3.7982843856414266e-07, "loss": 0.0002, "step": 11689 }, { "epoch": 2.74, "learning_rate": 3.791368951340768e-07, "loss": 0.0064, "step": 11690 }, { "epoch": 2.74, "learning_rate": 3.784459696501797e-07, "loss": 0.0055, "step": 11691 }, { "epoch": 2.74, "learning_rate": 3.7775566215682566e-07, "loss": 0.0019, "step": 11692 }, { "epoch": 2.74, "learning_rate": 3.770659726983572e-07, "loss": 0.0483, "step": 11693 }, { "epoch": 2.74, "learning_rate": 3.7637690131907103e-07, "loss": 0.0042, "step": 11694 }, { "epoch": 2.74, "learning_rate": 3.756884480632261e-07, "loss": 0.0041, "step": 11695 }, { "epoch": 2.74, "learning_rate": 3.7500061297504054e-07, "loss": 0.0051, "step": 11696 }, { "epoch": 2.75, "learning_rate": 3.7431339609869665e-07, "loss": 0.0063, "step": 11697 }, { "epoch": 2.75, "learning_rate": 3.7362679747833144e-07, "loss": 0.0008, "step": 11698 }, { "epoch": 2.75, "learning_rate": 3.729408171580462e-07, "loss": 0.0128, "step": 11699 }, { "epoch": 2.75, "learning_rate": 3.7225545518190245e-07, "loss": 0.0003, "step": 11700 }, { "epoch": 2.75, "learning_rate": 3.7157071159391936e-07, "loss": 0.0595, "step": 11701 }, { "epoch": 2.75, "learning_rate": 3.708865864380773e-07, "loss": 0.0501, "step": 11702 }, { "epoch": 2.75, "learning_rate": 3.702030797583178e-07, "loss": 0.0342, "step": 11703 }, { "epoch": 2.75, "learning_rate": 3.695201915985447e-07, "loss": 0.0011, "step": 11704 }, { "epoch": 2.75, "learning_rate": 3.6883792200261727e-07, "loss": 0.0011, "step": 11705 }, { "epoch": 2.75, "learning_rate": 3.6815627101435933e-07, "loss": 0.0028, "step": 11706 }, { "epoch": 2.75, "learning_rate": 3.6747523867755043e-07, "loss": 0.0006, "step": 11707 }, { "epoch": 2.75, "learning_rate": 3.667948250359354e-07, "loss": 0.0137, "step": 11708 }, { "epoch": 2.75, "learning_rate": 3.6611503013321724e-07, "loss": 0.0005, "step": 11709 }, { "epoch": 2.75, "learning_rate": 3.654358540130587e-07, "loss": 0.0038, "step": 11710 }, { "epoch": 2.75, "learning_rate": 3.6475729671908267e-07, "loss": 0.0036, "step": 11711 }, { "epoch": 2.75, "learning_rate": 3.64079358294871e-07, "loss": 0.0202, "step": 11712 }, { "epoch": 2.75, "learning_rate": 3.6340203878397094e-07, "loss": 0.0213, "step": 11713 }, { "epoch": 2.75, "learning_rate": 3.6272533822988564e-07, "loss": 0.005, "step": 11714 }, { "epoch": 2.75, "learning_rate": 3.6204925667607914e-07, "loss": 0.0038, "step": 11715 }, { "epoch": 2.75, "learning_rate": 3.613737941659734e-07, "loss": 0.0055, "step": 11716 }, { "epoch": 2.75, "learning_rate": 3.606989507429581e-07, "loss": 0.0027, "step": 11717 }, { "epoch": 2.75, "learning_rate": 3.600247264503753e-07, "loss": 0.0141, "step": 11718 }, { "epoch": 2.75, "learning_rate": 3.5935112133152927e-07, "loss": 0.0053, "step": 11719 }, { "epoch": 2.75, "learning_rate": 3.5867813542968863e-07, "loss": 0.0091, "step": 11720 }, { "epoch": 2.75, "learning_rate": 3.580057687880778e-07, "loss": 0.0177, "step": 11721 }, { "epoch": 2.75, "learning_rate": 3.573340214498822e-07, "loss": 0.0005, "step": 11722 }, { "epoch": 2.75, "learning_rate": 3.5666289345824724e-07, "loss": 0.0053, "step": 11723 }, { "epoch": 2.75, "learning_rate": 3.559923848562818e-07, "loss": 0.0012, "step": 11724 }, { "epoch": 2.75, "learning_rate": 3.553224956870505e-07, "loss": 0.0026, "step": 11725 }, { "epoch": 2.75, "learning_rate": 3.546532259935809e-07, "loss": 0.0001, "step": 11726 }, { "epoch": 2.75, "learning_rate": 3.539845758188609e-07, "loss": 0.0006, "step": 11727 }, { "epoch": 2.75, "learning_rate": 3.5331654520583515e-07, "loss": 0.0031, "step": 11728 }, { "epoch": 2.75, "learning_rate": 3.5264913419741475e-07, "loss": 0.0088, "step": 11729 }, { "epoch": 2.75, "learning_rate": 3.519823428364655e-07, "loss": 0.009, "step": 11730 }, { "epoch": 2.75, "learning_rate": 3.513161711658153e-07, "loss": 0.0186, "step": 11731 }, { "epoch": 2.75, "learning_rate": 3.5065061922824997e-07, "loss": 0.0067, "step": 11732 }, { "epoch": 2.75, "learning_rate": 3.4998568706652305e-07, "loss": 0.0308, "step": 11733 }, { "epoch": 2.75, "learning_rate": 3.4932137472333815e-07, "loss": 0.0018, "step": 11734 }, { "epoch": 2.75, "learning_rate": 3.4865768224136565e-07, "loss": 0.0011, "step": 11735 }, { "epoch": 2.75, "learning_rate": 3.479946096632358e-07, "loss": 0.0009, "step": 11736 }, { "epoch": 2.75, "learning_rate": 3.473321570315358e-07, "loss": 0.0001, "step": 11737 }, { "epoch": 2.75, "learning_rate": 3.466703243888159e-07, "loss": 0.0003, "step": 11738 }, { "epoch": 2.76, "learning_rate": 3.4600911177758325e-07, "loss": 0.0013, "step": 11739 }, { "epoch": 2.76, "learning_rate": 3.4534851924031164e-07, "loss": 0.0218, "step": 11740 }, { "epoch": 2.76, "learning_rate": 3.446885468194261e-07, "loss": 0.0109, "step": 11741 }, { "epoch": 2.76, "learning_rate": 3.4402919455731934e-07, "loss": 0.0002, "step": 11742 }, { "epoch": 2.76, "learning_rate": 3.433704624963419e-07, "loss": 0.0484, "step": 11743 }, { "epoch": 2.76, "learning_rate": 3.4271235067880106e-07, "loss": 0.0217, "step": 11744 }, { "epoch": 2.76, "learning_rate": 3.4205485914696966e-07, "loss": 0.0006, "step": 11745 }, { "epoch": 2.76, "learning_rate": 3.413979879430784e-07, "loss": 0.001, "step": 11746 }, { "epoch": 2.76, "learning_rate": 3.4074173710931804e-07, "loss": 0.0216, "step": 11747 }, { "epoch": 2.76, "learning_rate": 3.4008610668783695e-07, "loss": 0.0009, "step": 11748 }, { "epoch": 2.76, "learning_rate": 3.3943109672074814e-07, "loss": 0.0006, "step": 11749 }, { "epoch": 2.76, "learning_rate": 3.387767072501236e-07, "loss": 0.0262, "step": 11750 }, { "epoch": 2.76, "learning_rate": 3.381229383179918e-07, "loss": 0.0224, "step": 11751 }, { "epoch": 2.76, "learning_rate": 3.3746978996634706e-07, "loss": 0.0053, "step": 11752 }, { "epoch": 2.76, "learning_rate": 3.3681726223714015e-07, "loss": 0.0037, "step": 11753 }, { "epoch": 2.76, "learning_rate": 3.361653551722821e-07, "loss": 0.0038, "step": 11754 }, { "epoch": 2.76, "learning_rate": 3.355140688136449e-07, "loss": 0.0018, "step": 11755 }, { "epoch": 2.76, "learning_rate": 3.3486340320306067e-07, "loss": 0.0011, "step": 11756 }, { "epoch": 2.76, "learning_rate": 3.342133583823226e-07, "loss": 0.0072, "step": 11757 }, { "epoch": 2.76, "learning_rate": 3.335639343931818e-07, "loss": 0.0005, "step": 11758 }, { "epoch": 2.76, "learning_rate": 3.329151312773493e-07, "loss": 0.0075, "step": 11759 }, { "epoch": 2.76, "learning_rate": 3.322669490764996e-07, "loss": 0.0423, "step": 11760 }, { "epoch": 2.76, "learning_rate": 3.31619387832266e-07, "loss": 0.0059, "step": 11761 }, { "epoch": 2.76, "learning_rate": 3.309724475862397e-07, "loss": 0.0103, "step": 11762 }, { "epoch": 2.76, "learning_rate": 3.303261283799741e-07, "loss": 0.0012, "step": 11763 }, { "epoch": 2.76, "learning_rate": 3.2968043025498164e-07, "loss": 0.0006, "step": 11764 }, { "epoch": 2.76, "learning_rate": 3.290353532527357e-07, "loss": 0.0836, "step": 11765 }, { "epoch": 2.76, "learning_rate": 3.283908974146699e-07, "loss": 0.0047, "step": 11766 }, { "epoch": 2.76, "learning_rate": 3.2774706278217774e-07, "loss": 0.0109, "step": 11767 }, { "epoch": 2.76, "learning_rate": 3.271038493966106e-07, "loss": 0.0008, "step": 11768 }, { "epoch": 2.76, "learning_rate": 3.2646125729928425e-07, "loss": 0.0074, "step": 11769 }, { "epoch": 2.76, "learning_rate": 3.2581928653147134e-07, "loss": 0.0002, "step": 11770 }, { "epoch": 2.76, "learning_rate": 3.2517793713440437e-07, "loss": 0.0059, "step": 11771 }, { "epoch": 2.76, "learning_rate": 3.2453720914927935e-07, "loss": 0.0006, "step": 11772 }, { "epoch": 2.76, "learning_rate": 3.2389710261724885e-07, "loss": 0.0201, "step": 11773 }, { "epoch": 2.76, "learning_rate": 3.2325761757942663e-07, "loss": 0.0359, "step": 11774 }, { "epoch": 2.76, "learning_rate": 3.226187540768866e-07, "loss": 0.0086, "step": 11775 }, { "epoch": 2.76, "learning_rate": 3.219805121506625e-07, "loss": 0.0187, "step": 11776 }, { "epoch": 2.76, "learning_rate": 3.213428918417527e-07, "loss": 0.0011, "step": 11777 }, { "epoch": 2.76, "learning_rate": 3.2070589319110445e-07, "loss": 0.008, "step": 11778 }, { "epoch": 2.76, "learning_rate": 3.2006951623963724e-07, "loss": 0.0002, "step": 11779 }, { "epoch": 2.76, "learning_rate": 3.1943376102822275e-07, "loss": 0.0249, "step": 11780 }, { "epoch": 2.76, "learning_rate": 3.1879862759769733e-07, "loss": 0.0003, "step": 11781 }, { "epoch": 2.77, "learning_rate": 3.181641159888538e-07, "loss": 0.0006, "step": 11782 }, { "epoch": 2.77, "learning_rate": 3.1753022624244845e-07, "loss": 0.0089, "step": 11783 }, { "epoch": 2.77, "learning_rate": 3.168969583991932e-07, "loss": 0.0475, "step": 11784 }, { "epoch": 2.77, "learning_rate": 3.162643124997666e-07, "loss": 0.0005, "step": 11785 }, { "epoch": 2.77, "learning_rate": 3.1563228858480067e-07, "loss": 0.0004, "step": 11786 }, { "epoch": 2.77, "learning_rate": 3.150008866948895e-07, "loss": 0.0002, "step": 11787 }, { "epoch": 2.77, "learning_rate": 3.1437010687058956e-07, "loss": 0.0034, "step": 11788 }, { "epoch": 2.77, "learning_rate": 3.1373994915241625e-07, "loss": 0.0055, "step": 11789 }, { "epoch": 2.77, "learning_rate": 3.131104135808427e-07, "loss": 0.0051, "step": 11790 }, { "epoch": 2.77, "learning_rate": 3.1248150019630443e-07, "loss": 0.0001, "step": 11791 }, { "epoch": 2.77, "learning_rate": 3.118532090391968e-07, "loss": 0.0104, "step": 11792 }, { "epoch": 2.77, "learning_rate": 3.1122554014987537e-07, "loss": 0.0426, "step": 11793 }, { "epoch": 2.77, "learning_rate": 3.105984935686535e-07, "loss": 0.0503, "step": 11794 }, { "epoch": 2.77, "learning_rate": 3.099720693358088e-07, "loss": 0.0278, "step": 11795 }, { "epoch": 2.77, "learning_rate": 3.093462674915726e-07, "loss": 0.0002, "step": 11796 }, { "epoch": 2.77, "learning_rate": 3.0872108807614377e-07, "loss": 0.0043, "step": 11797 }, { "epoch": 2.77, "learning_rate": 3.0809653112967686e-07, "loss": 0.0247, "step": 11798 }, { "epoch": 2.77, "learning_rate": 3.0747259669228535e-07, "loss": 0.0001, "step": 11799 }, { "epoch": 2.77, "learning_rate": 3.068492848040439e-07, "loss": 0.0171, "step": 11800 }, { "epoch": 2.77, "learning_rate": 3.0622659550499034e-07, "loss": 0.0006, "step": 11801 }, { "epoch": 2.77, "learning_rate": 3.056045288351195e-07, "loss": 0.0004, "step": 11802 }, { "epoch": 2.77, "learning_rate": 3.0498308483438377e-07, "loss": 0.0972, "step": 11803 }, { "epoch": 2.77, "learning_rate": 3.0436226354270127e-07, "loss": 0.0042, "step": 11804 }, { "epoch": 2.77, "learning_rate": 3.0374206499994676e-07, "loss": 0.0085, "step": 11805 }, { "epoch": 2.77, "learning_rate": 3.03122489245955e-07, "loss": 0.0084, "step": 11806 }, { "epoch": 2.77, "learning_rate": 3.025035363205198e-07, "loss": 0.0017, "step": 11807 }, { "epoch": 2.77, "learning_rate": 3.0188520626339923e-07, "loss": 0.0111, "step": 11808 }, { "epoch": 2.77, "learning_rate": 3.0126749911430607e-07, "loss": 0.0134, "step": 11809 }, { "epoch": 2.77, "learning_rate": 3.0065041491291634e-07, "loss": 0.0031, "step": 11810 }, { "epoch": 2.77, "learning_rate": 3.0003395369886615e-07, "loss": 0.0037, "step": 11811 }, { "epoch": 2.77, "learning_rate": 2.9941811551174926e-07, "loss": 0.01, "step": 11812 }, { "epoch": 2.77, "learning_rate": 2.988029003911241e-07, "loss": 0.0007, "step": 11813 }, { "epoch": 2.77, "learning_rate": 2.9818830837650027e-07, "loss": 0.0006, "step": 11814 }, { "epoch": 2.77, "learning_rate": 2.9757433950735717e-07, "loss": 0.0107, "step": 11815 }, { "epoch": 2.77, "learning_rate": 2.969609938231277e-07, "loss": 0.0017, "step": 11816 }, { "epoch": 2.77, "learning_rate": 2.963482713632082e-07, "loss": 0.0091, "step": 11817 }, { "epoch": 2.77, "learning_rate": 2.957361721669538e-07, "loss": 0.0003, "step": 11818 }, { "epoch": 2.77, "learning_rate": 2.951246962736787e-07, "loss": 0.0003, "step": 11819 }, { "epoch": 2.77, "learning_rate": 2.945138437226591e-07, "loss": 0.0104, "step": 11820 }, { "epoch": 2.77, "learning_rate": 2.939036145531282e-07, "loss": 0.0001, "step": 11821 }, { "epoch": 2.77, "learning_rate": 2.932940088042824e-07, "loss": 0.0011, "step": 11822 }, { "epoch": 2.77, "learning_rate": 2.926850265152748e-07, "loss": 0.0014, "step": 11823 }, { "epoch": 2.78, "learning_rate": 2.920766677252229e-07, "loss": 0.0129, "step": 11824 }, { "epoch": 2.78, "learning_rate": 2.9146893247319897e-07, "loss": 0.0364, "step": 11825 }, { "epoch": 2.78, "learning_rate": 2.9086182079823944e-07, "loss": 0.0556, "step": 11826 }, { "epoch": 2.78, "learning_rate": 2.902553327393376e-07, "loss": 0.0059, "step": 11827 }, { "epoch": 2.78, "learning_rate": 2.8964946833544784e-07, "loss": 0.0065, "step": 11828 }, { "epoch": 2.78, "learning_rate": 2.890442276254879e-07, "loss": 0.0001, "step": 11829 }, { "epoch": 2.78, "learning_rate": 2.8843961064832783e-07, "loss": 0.001, "step": 11830 }, { "epoch": 2.78, "learning_rate": 2.878356174428043e-07, "loss": 0.0079, "step": 11831 }, { "epoch": 2.78, "learning_rate": 2.872322480477119e-07, "loss": 0.0391, "step": 11832 }, { "epoch": 2.78, "learning_rate": 2.866295025018051e-07, "loss": 0.0005, "step": 11833 }, { "epoch": 2.78, "learning_rate": 2.8602738084379744e-07, "loss": 0.0069, "step": 11834 }, { "epoch": 2.78, "learning_rate": 2.854258831123635e-07, "loss": 0.0002, "step": 11835 }, { "epoch": 2.78, "learning_rate": 2.8482500934613577e-07, "loss": 0.0006, "step": 11836 }, { "epoch": 2.78, "learning_rate": 2.842247595837111e-07, "loss": 0.0051, "step": 11837 }, { "epoch": 2.78, "learning_rate": 2.8362513386364086e-07, "loss": 0.005, "step": 11838 }, { "epoch": 2.78, "learning_rate": 2.8302613222443987e-07, "loss": 0.02, "step": 11839 }, { "epoch": 2.78, "learning_rate": 2.8242775470458284e-07, "loss": 0.0076, "step": 11840 }, { "epoch": 2.78, "learning_rate": 2.8183000134250236e-07, "loss": 0.0011, "step": 11841 }, { "epoch": 2.78, "learning_rate": 2.812328721765922e-07, "loss": 0.0438, "step": 11842 }, { "epoch": 2.78, "learning_rate": 2.8063636724520503e-07, "loss": 0.0002, "step": 11843 }, { "epoch": 2.78, "learning_rate": 2.8004048658665573e-07, "loss": 0.0013, "step": 11844 }, { "epoch": 2.78, "learning_rate": 2.794452302392181e-07, "loss": 0.0019, "step": 11845 }, { "epoch": 2.78, "learning_rate": 2.7885059824112157e-07, "loss": 0.0038, "step": 11846 }, { "epoch": 2.78, "learning_rate": 2.7825659063056343e-07, "loss": 0.0204, "step": 11847 }, { "epoch": 2.78, "learning_rate": 2.776632074456942e-07, "loss": 0.0015, "step": 11848 }, { "epoch": 2.78, "learning_rate": 2.770704487246301e-07, "loss": 0.0001, "step": 11849 }, { "epoch": 2.78, "learning_rate": 2.764783145054384e-07, "loss": 0.0007, "step": 11850 }, { "epoch": 2.78, "learning_rate": 2.7588680482615646e-07, "loss": 0.0007, "step": 11851 }, { "epoch": 2.78, "learning_rate": 2.7529591972477396e-07, "loss": 0.0024, "step": 11852 }, { "epoch": 2.78, "learning_rate": 2.747056592392461e-07, "loss": 0.0053, "step": 11853 }, { "epoch": 2.78, "learning_rate": 2.7411602340748356e-07, "loss": 0.0641, "step": 11854 }, { "epoch": 2.78, "learning_rate": 2.735270122673572e-07, "loss": 0.0035, "step": 11855 }, { "epoch": 2.78, "learning_rate": 2.729386258567024e-07, "loss": 0.0352, "step": 11856 }, { "epoch": 2.78, "learning_rate": 2.723508642133077e-07, "loss": 0.007, "step": 11857 }, { "epoch": 2.78, "learning_rate": 2.717637273749274e-07, "loss": 0.0144, "step": 11858 }, { "epoch": 2.78, "learning_rate": 2.711772153792713e-07, "loss": 0.0153, "step": 11859 }, { "epoch": 2.78, "learning_rate": 2.705913282640127e-07, "loss": 0.0003, "step": 11860 }, { "epoch": 2.78, "learning_rate": 2.700060660667825e-07, "loss": 0.0115, "step": 11861 }, { "epoch": 2.78, "learning_rate": 2.6942142882517063e-07, "loss": 0.0044, "step": 11862 }, { "epoch": 2.78, "learning_rate": 2.6883741657673045e-07, "loss": 0.0002, "step": 11863 }, { "epoch": 2.78, "learning_rate": 2.682540293589708e-07, "loss": 0.0001, "step": 11864 }, { "epoch": 2.78, "learning_rate": 2.676712672093651e-07, "loss": 0.001, "step": 11865 }, { "epoch": 2.78, "learning_rate": 2.6708913016534e-07, "loss": 0.0088, "step": 11866 }, { "epoch": 2.79, "learning_rate": 2.6650761826429007e-07, "loss": 0.0104, "step": 11867 }, { "epoch": 2.79, "learning_rate": 2.6592673154356317e-07, "loss": 0.0175, "step": 11868 }, { "epoch": 2.79, "learning_rate": 2.6534647004047175e-07, "loss": 0.0128, "step": 11869 }, { "epoch": 2.79, "learning_rate": 2.647668337922849e-07, "loss": 0.0068, "step": 11870 }, { "epoch": 2.79, "learning_rate": 2.6418782283623044e-07, "loss": 0.0001, "step": 11871 }, { "epoch": 2.79, "learning_rate": 2.6360943720950104e-07, "loss": 0.0005, "step": 11872 }, { "epoch": 2.79, "learning_rate": 2.6303167694924583e-07, "loss": 0.0225, "step": 11873 }, { "epoch": 2.79, "learning_rate": 2.624545420925739e-07, "loss": 0.0247, "step": 11874 }, { "epoch": 2.79, "learning_rate": 2.6187803267655245e-07, "loss": 0.003, "step": 11875 }, { "epoch": 2.79, "learning_rate": 2.6130214873821394e-07, "loss": 0.005, "step": 11876 }, { "epoch": 2.79, "learning_rate": 2.6072689031454545e-07, "loss": 0.0344, "step": 11877 }, { "epoch": 2.79, "learning_rate": 2.601522574424964e-07, "loss": 0.0308, "step": 11878 }, { "epoch": 2.79, "learning_rate": 2.5957825015897385e-07, "loss": 0.0296, "step": 11879 }, { "epoch": 2.79, "learning_rate": 2.5900486850084725e-07, "loss": 0.0436, "step": 11880 }, { "epoch": 2.79, "learning_rate": 2.5843211250494604e-07, "loss": 0.0002, "step": 11881 }, { "epoch": 2.79, "learning_rate": 2.5785998220805633e-07, "loss": 0.02, "step": 11882 }, { "epoch": 2.79, "learning_rate": 2.572884776469264e-07, "loss": 0.0135, "step": 11883 }, { "epoch": 2.79, "learning_rate": 2.5671759885826376e-07, "loss": 0.0017, "step": 11884 }, { "epoch": 2.79, "learning_rate": 2.561473458787378e-07, "loss": 0.0004, "step": 11885 }, { "epoch": 2.79, "learning_rate": 2.555777187449726e-07, "loss": 0.005, "step": 11886 }, { "epoch": 2.79, "learning_rate": 2.550087174935578e-07, "loss": 0.0019, "step": 11887 }, { "epoch": 2.79, "learning_rate": 2.544403421610375e-07, "loss": 0.0039, "step": 11888 }, { "epoch": 2.79, "learning_rate": 2.538725927839214e-07, "loss": 0.008, "step": 11889 }, { "epoch": 2.79, "learning_rate": 2.5330546939867476e-07, "loss": 0.0018, "step": 11890 }, { "epoch": 2.79, "learning_rate": 2.527389720417217e-07, "loss": 0.0009, "step": 11891 }, { "epoch": 2.79, "learning_rate": 2.5217310074945214e-07, "loss": 0.0043, "step": 11892 }, { "epoch": 2.79, "learning_rate": 2.5160785555820913e-07, "loss": 0.0039, "step": 11893 }, { "epoch": 2.79, "learning_rate": 2.5104323650430027e-07, "loss": 0.0004, "step": 11894 }, { "epoch": 2.79, "learning_rate": 2.504792436239878e-07, "loss": 0.0013, "step": 11895 }, { "epoch": 2.79, "learning_rate": 2.499158769534982e-07, "loss": 0.0001, "step": 11896 }, { "epoch": 2.79, "learning_rate": 2.493531365290192e-07, "loss": 0.0028, "step": 11897 }, { "epoch": 2.79, "learning_rate": 2.487910223866918e-07, "loss": 0.0003, "step": 11898 }, { "epoch": 2.79, "learning_rate": 2.482295345626218e-07, "loss": 0.0025, "step": 11899 }, { "epoch": 2.79, "learning_rate": 2.4766867309287345e-07, "loss": 0.0002, "step": 11900 }, { "epoch": 2.79, "learning_rate": 2.4710843801347364e-07, "loss": 0.0159, "step": 11901 }, { "epoch": 2.79, "learning_rate": 2.4654882936040017e-07, "loss": 0.0002, "step": 11902 }, { "epoch": 2.79, "learning_rate": 2.4598984716960095e-07, "loss": 0.0016, "step": 11903 }, { "epoch": 2.79, "learning_rate": 2.4543149147697733e-07, "loss": 0.0003, "step": 11904 }, { "epoch": 2.79, "learning_rate": 2.44873762318395e-07, "loss": 0.0219, "step": 11905 }, { "epoch": 2.79, "learning_rate": 2.443166597296742e-07, "loss": 0.0034, "step": 11906 }, { "epoch": 2.79, "learning_rate": 2.437601837465975e-07, "loss": 0.0423, "step": 11907 }, { "epoch": 2.79, "learning_rate": 2.4320433440490955e-07, "loss": 0.0027, "step": 11908 }, { "epoch": 2.79, "learning_rate": 2.426491117403107e-07, "loss": 0.0066, "step": 11909 }, { "epoch": 2.8, "learning_rate": 2.4209451578846246e-07, "loss": 0.0006, "step": 11910 }, { "epoch": 2.8, "learning_rate": 2.4154054658498737e-07, "loss": 0.0022, "step": 11911 }, { "epoch": 2.8, "learning_rate": 2.4098720416546594e-07, "loss": 0.0087, "step": 11912 }, { "epoch": 2.8, "learning_rate": 2.4043448856544083e-07, "loss": 0.0096, "step": 11913 }, { "epoch": 2.8, "learning_rate": 2.398823998204103e-07, "loss": 0.0303, "step": 11914 }, { "epoch": 2.8, "learning_rate": 2.393309379658382e-07, "loss": 0.0008, "step": 11915 }, { "epoch": 2.8, "learning_rate": 2.387801030371406e-07, "loss": 0.0023, "step": 11916 }, { "epoch": 2.8, "learning_rate": 2.3822989506970263e-07, "loss": 0.0309, "step": 11917 }, { "epoch": 2.8, "learning_rate": 2.376803140988604e-07, "loss": 0.0026, "step": 11918 }, { "epoch": 2.8, "learning_rate": 2.3713136015991346e-07, "loss": 0.0114, "step": 11919 }, { "epoch": 2.8, "learning_rate": 2.365830332881225e-07, "loss": 0.0001, "step": 11920 }, { "epoch": 2.8, "learning_rate": 2.3603533351870712e-07, "loss": 0.0016, "step": 11921 }, { "epoch": 2.8, "learning_rate": 2.354882608868425e-07, "loss": 0.0017, "step": 11922 }, { "epoch": 2.8, "learning_rate": 2.3494181542766948e-07, "loss": 0.0276, "step": 11923 }, { "epoch": 2.8, "learning_rate": 2.3439599717628658e-07, "loss": 0.003, "step": 11924 }, { "epoch": 2.8, "learning_rate": 2.3385080616775025e-07, "loss": 0.0329, "step": 11925 }, { "epoch": 2.8, "learning_rate": 2.3330624243708021e-07, "loss": 0.0531, "step": 11926 }, { "epoch": 2.8, "learning_rate": 2.3276230601924965e-07, "loss": 0.0286, "step": 11927 }, { "epoch": 2.8, "learning_rate": 2.3221899694919948e-07, "loss": 0.001, "step": 11928 }, { "epoch": 2.8, "learning_rate": 2.316763152618251e-07, "loss": 0.0184, "step": 11929 }, { "epoch": 2.8, "learning_rate": 2.31134260991982e-07, "loss": 0.0267, "step": 11930 }, { "epoch": 2.8, "learning_rate": 2.3059283417448673e-07, "loss": 0.0199, "step": 11931 }, { "epoch": 2.8, "learning_rate": 2.3005203484411486e-07, "loss": 0.0132, "step": 11932 }, { "epoch": 2.8, "learning_rate": 2.2951186303560302e-07, "loss": 0.0642, "step": 11933 }, { "epoch": 2.8, "learning_rate": 2.2897231878364456e-07, "loss": 0.0014, "step": 11934 }, { "epoch": 2.8, "learning_rate": 2.284334021228951e-07, "loss": 0.0122, "step": 11935 }, { "epoch": 2.8, "learning_rate": 2.2789511308796808e-07, "loss": 0.0122, "step": 11936 }, { "epoch": 2.8, "learning_rate": 2.2735745171344136e-07, "loss": 0.0018, "step": 11937 }, { "epoch": 2.8, "learning_rate": 2.2682041803384402e-07, "loss": 0.0026, "step": 11938 }, { "epoch": 2.8, "learning_rate": 2.2628401208367289e-07, "loss": 0.0013, "step": 11939 }, { "epoch": 2.8, "learning_rate": 2.2574823389737933e-07, "loss": 0.0061, "step": 11940 }, { "epoch": 2.8, "learning_rate": 2.2521308350937798e-07, "loss": 0.003, "step": 11941 }, { "epoch": 2.8, "learning_rate": 2.246785609540414e-07, "loss": 0.0037, "step": 11942 }, { "epoch": 2.8, "learning_rate": 2.2414466626569876e-07, "loss": 0.011, "step": 11943 }, { "epoch": 2.8, "learning_rate": 2.2361139947864595e-07, "loss": 0.014, "step": 11944 }, { "epoch": 2.8, "learning_rate": 2.230787606271334e-07, "loss": 0.0568, "step": 11945 }, { "epoch": 2.8, "learning_rate": 2.225467497453704e-07, "loss": 0.0089, "step": 11946 }, { "epoch": 2.8, "learning_rate": 2.2201536686752955e-07, "loss": 0.0074, "step": 11947 }, { "epoch": 2.8, "learning_rate": 2.214846120277414e-07, "loss": 0.0026, "step": 11948 }, { "epoch": 2.8, "learning_rate": 2.2095448526009867e-07, "loss": 0.001, "step": 11949 }, { "epoch": 2.8, "learning_rate": 2.204249865986463e-07, "loss": 0.0004, "step": 11950 }, { "epoch": 2.8, "learning_rate": 2.1989611607739713e-07, "loss": 0.0129, "step": 11951 }, { "epoch": 2.81, "learning_rate": 2.1936787373031842e-07, "loss": 0.0025, "step": 11952 }, { "epoch": 2.81, "learning_rate": 2.1884025959134303e-07, "loss": 0.006, "step": 11953 }, { "epoch": 2.81, "learning_rate": 2.1831327369435496e-07, "loss": 0.0045, "step": 11954 }, { "epoch": 2.81, "learning_rate": 2.177869160732049e-07, "loss": 0.0007, "step": 11955 }, { "epoch": 2.81, "learning_rate": 2.1726118676169916e-07, "loss": 0.0035, "step": 11956 }, { "epoch": 2.81, "learning_rate": 2.1673608579360738e-07, "loss": 0.0121, "step": 11957 }, { "epoch": 2.81, "learning_rate": 2.1621161320265483e-07, "loss": 0.02, "step": 11958 }, { "epoch": 2.81, "learning_rate": 2.1568776902252785e-07, "loss": 0.0003, "step": 11959 }, { "epoch": 2.81, "learning_rate": 2.1516455328687624e-07, "loss": 0.0553, "step": 11960 }, { "epoch": 2.81, "learning_rate": 2.1464196602930197e-07, "loss": 0.0372, "step": 11961 }, { "epoch": 2.81, "learning_rate": 2.141200072833738e-07, "loss": 0.0033, "step": 11962 }, { "epoch": 2.81, "learning_rate": 2.1359867708261372e-07, "loss": 0.0143, "step": 11963 }, { "epoch": 2.81, "learning_rate": 2.1307797546051057e-07, "loss": 0.0012, "step": 11964 }, { "epoch": 2.81, "learning_rate": 2.1255790245050644e-07, "loss": 0.0006, "step": 11965 }, { "epoch": 2.81, "learning_rate": 2.1203845808600577e-07, "loss": 0.044, "step": 11966 }, { "epoch": 2.81, "learning_rate": 2.1151964240037292e-07, "loss": 0.0327, "step": 11967 }, { "epoch": 2.81, "learning_rate": 2.1100145542693018e-07, "loss": 0.0159, "step": 11968 }, { "epoch": 2.81, "learning_rate": 2.1048389719896422e-07, "loss": 0.0036, "step": 11969 }, { "epoch": 2.81, "learning_rate": 2.099669677497118e-07, "loss": 0.0114, "step": 11970 }, { "epoch": 2.81, "learning_rate": 2.0945066711238083e-07, "loss": 0.0541, "step": 11971 }, { "epoch": 2.81, "learning_rate": 2.0893499532012919e-07, "loss": 0.0052, "step": 11972 }, { "epoch": 2.81, "learning_rate": 2.0841995240608149e-07, "loss": 0.0134, "step": 11973 }, { "epoch": 2.81, "learning_rate": 2.0790553840331574e-07, "loss": 0.0122, "step": 11974 }, { "epoch": 2.81, "learning_rate": 2.0739175334487326e-07, "loss": 0.0164, "step": 11975 }, { "epoch": 2.81, "learning_rate": 2.068785972637577e-07, "loss": 0.0423, "step": 11976 }, { "epoch": 2.81, "learning_rate": 2.06366070192926e-07, "loss": 0.0, "step": 11977 }, { "epoch": 2.81, "learning_rate": 2.0585417216529734e-07, "loss": 0.0088, "step": 11978 }, { "epoch": 2.81, "learning_rate": 2.0534290321375216e-07, "loss": 0.0012, "step": 11979 }, { "epoch": 2.81, "learning_rate": 2.0483226337112862e-07, "loss": 0.0013, "step": 11980 }, { "epoch": 2.81, "learning_rate": 2.0432225267022597e-07, "loss": 0.0158, "step": 11981 }, { "epoch": 2.81, "learning_rate": 2.0381287114380032e-07, "loss": 0.0084, "step": 11982 }, { "epoch": 2.81, "learning_rate": 2.0330411882457102e-07, "loss": 0.0009, "step": 11983 }, { "epoch": 2.81, "learning_rate": 2.0279599574521302e-07, "loss": 0.0061, "step": 11984 }, { "epoch": 2.81, "learning_rate": 2.022885019383658e-07, "loss": 0.0115, "step": 11985 }, { "epoch": 2.81, "learning_rate": 2.0178163743662214e-07, "loss": 0.0002, "step": 11986 }, { "epoch": 2.81, "learning_rate": 2.012754022725405e-07, "loss": 0.049, "step": 11987 }, { "epoch": 2.81, "learning_rate": 2.007697964786348e-07, "loss": 0.0136, "step": 11988 }, { "epoch": 2.81, "learning_rate": 2.0026482008738247e-07, "loss": 0.0004, "step": 11989 }, { "epoch": 2.81, "learning_rate": 1.9976047313121417e-07, "loss": 0.0027, "step": 11990 }, { "epoch": 2.81, "learning_rate": 1.9925675564252622e-07, "loss": 0.0123, "step": 11991 }, { "epoch": 2.81, "learning_rate": 1.9875366765367276e-07, "loss": 0.0328, "step": 11992 }, { "epoch": 2.81, "learning_rate": 1.9825120919696683e-07, "loss": 0.0338, "step": 11993 }, { "epoch": 2.81, "learning_rate": 1.9774938030468038e-07, "loss": 0.0002, "step": 11994 }, { "epoch": 2.82, "learning_rate": 1.972481810090465e-07, "loss": 0.0005, "step": 11995 }, { "epoch": 2.82, "learning_rate": 1.9674761134225618e-07, "loss": 0.0242, "step": 11996 }, { "epoch": 2.82, "learning_rate": 1.962476713364625e-07, "loss": 0.0372, "step": 11997 }, { "epoch": 2.82, "learning_rate": 1.9574836102377537e-07, "loss": 0.0384, "step": 11998 }, { "epoch": 2.82, "learning_rate": 1.952496804362636e-07, "loss": 0.007, "step": 11999 }, { "epoch": 2.82, "learning_rate": 1.947516296059615e-07, "loss": 0.0003, "step": 12000 }, { "epoch": 2.82, "learning_rate": 1.942542085648569e-07, "loss": 0.0002, "step": 12001 }, { "epoch": 2.82, "learning_rate": 1.937574173448964e-07, "loss": 0.0003, "step": 12002 }, { "epoch": 2.82, "learning_rate": 1.932612559779934e-07, "loss": 0.0047, "step": 12003 }, { "epoch": 2.82, "learning_rate": 1.9276572449601238e-07, "loss": 0.0067, "step": 12004 }, { "epoch": 2.82, "learning_rate": 1.9227082293078458e-07, "loss": 0.0036, "step": 12005 }, { "epoch": 2.82, "learning_rate": 1.9177655131409456e-07, "loss": 0.0034, "step": 12006 }, { "epoch": 2.82, "learning_rate": 1.9128290967769136e-07, "loss": 0.0134, "step": 12007 }, { "epoch": 2.82, "learning_rate": 1.9078989805327962e-07, "loss": 0.0006, "step": 12008 }, { "epoch": 2.82, "learning_rate": 1.9029751647252735e-07, "loss": 0.0089, "step": 12009 }, { "epoch": 2.82, "learning_rate": 1.8980576496705817e-07, "loss": 0.0025, "step": 12010 }, { "epoch": 2.82, "learning_rate": 1.8931464356845787e-07, "loss": 0.0741, "step": 12011 }, { "epoch": 2.82, "learning_rate": 1.8882415230827234e-07, "loss": 0.012, "step": 12012 }, { "epoch": 2.82, "learning_rate": 1.8833429121800417e-07, "loss": 0.0182, "step": 12013 }, { "epoch": 2.82, "learning_rate": 1.878450603291182e-07, "loss": 0.0063, "step": 12014 }, { "epoch": 2.82, "learning_rate": 1.8735645967303594e-07, "loss": 0.0003, "step": 12015 }, { "epoch": 2.82, "learning_rate": 1.868684892811423e-07, "loss": 0.0003, "step": 12016 }, { "epoch": 2.82, "learning_rate": 1.863811491847789e-07, "loss": 0.0006, "step": 12017 }, { "epoch": 2.82, "learning_rate": 1.858944394152451e-07, "loss": 0.0086, "step": 12018 }, { "epoch": 2.82, "learning_rate": 1.854083600038059e-07, "loss": 0.0013, "step": 12019 }, { "epoch": 2.82, "learning_rate": 1.849229109816797e-07, "loss": 0.0028, "step": 12020 }, { "epoch": 2.82, "learning_rate": 1.8443809238004816e-07, "loss": 0.0023, "step": 12021 }, { "epoch": 2.82, "learning_rate": 1.8395390423004867e-07, "loss": 0.0009, "step": 12022 }, { "epoch": 2.82, "learning_rate": 1.8347034656278296e-07, "loss": 0.0001, "step": 12023 }, { "epoch": 2.82, "learning_rate": 1.8298741940930843e-07, "loss": 0.0296, "step": 12024 }, { "epoch": 2.82, "learning_rate": 1.825051228006458e-07, "loss": 0.0004, "step": 12025 }, { "epoch": 2.82, "learning_rate": 1.8202345676777033e-07, "loss": 0.0004, "step": 12026 }, { "epoch": 2.82, "learning_rate": 1.8154242134161947e-07, "loss": 0.0021, "step": 12027 }, { "epoch": 2.82, "learning_rate": 1.8106201655309075e-07, "loss": 0.0375, "step": 12028 }, { "epoch": 2.82, "learning_rate": 1.8058224243304168e-07, "loss": 0.0016, "step": 12029 }, { "epoch": 2.82, "learning_rate": 1.801030990122865e-07, "loss": 0.0127, "step": 12030 }, { "epoch": 2.82, "learning_rate": 1.796245863215995e-07, "loss": 0.0366, "step": 12031 }, { "epoch": 2.82, "learning_rate": 1.791467043917172e-07, "loss": 0.0024, "step": 12032 }, { "epoch": 2.82, "learning_rate": 1.7866945325333395e-07, "loss": 0.0061, "step": 12033 }, { "epoch": 2.82, "learning_rate": 1.7819283293710298e-07, "loss": 0.0096, "step": 12034 }, { "epoch": 2.82, "learning_rate": 1.777168434736376e-07, "loss": 0.0221, "step": 12035 }, { "epoch": 2.82, "learning_rate": 1.7724148489351002e-07, "loss": 0.0085, "step": 12036 }, { "epoch": 2.83, "learning_rate": 1.7676675722725357e-07, "loss": 0.0017, "step": 12037 }, { "epoch": 2.83, "learning_rate": 1.7629266050535832e-07, "loss": 0.0002, "step": 12038 }, { "epoch": 2.83, "learning_rate": 1.7581919475827658e-07, "loss": 0.0418, "step": 12039 }, { "epoch": 2.83, "learning_rate": 1.7534636001641846e-07, "loss": 0.0076, "step": 12040 }, { "epoch": 2.83, "learning_rate": 1.7487415631015524e-07, "loss": 0.0198, "step": 12041 }, { "epoch": 2.83, "learning_rate": 1.744025836698149e-07, "loss": 0.0004, "step": 12042 }, { "epoch": 2.83, "learning_rate": 1.7393164212568537e-07, "loss": 0.006, "step": 12043 }, { "epoch": 2.83, "learning_rate": 1.7346133170801915e-07, "loss": 0.005, "step": 12044 }, { "epoch": 2.83, "learning_rate": 1.7299165244702098e-07, "loss": 0.0006, "step": 12045 }, { "epoch": 2.83, "learning_rate": 1.7252260437286005e-07, "loss": 0.0695, "step": 12046 }, { "epoch": 2.83, "learning_rate": 1.7205418751566006e-07, "loss": 0.0006, "step": 12047 }, { "epoch": 2.83, "learning_rate": 1.7158640190551135e-07, "loss": 0.0004, "step": 12048 }, { "epoch": 2.83, "learning_rate": 1.7111924757245767e-07, "loss": 0.0237, "step": 12049 }, { "epoch": 2.83, "learning_rate": 1.7065272454650507e-07, "loss": 0.0188, "step": 12050 }, { "epoch": 2.83, "learning_rate": 1.7018683285761616e-07, "loss": 0.0187, "step": 12051 }, { "epoch": 2.83, "learning_rate": 1.697215725357182e-07, "loss": 0.0003, "step": 12052 }, { "epoch": 2.83, "learning_rate": 1.6925694361069167e-07, "loss": 0.0071, "step": 12053 }, { "epoch": 2.83, "learning_rate": 1.6879294611238162e-07, "loss": 0.0177, "step": 12054 }, { "epoch": 2.83, "learning_rate": 1.6832958007059086e-07, "loss": 0.0134, "step": 12055 }, { "epoch": 2.83, "learning_rate": 1.6786684551508004e-07, "loss": 0.0064, "step": 12056 }, { "epoch": 2.83, "learning_rate": 1.6740474247556982e-07, "loss": 0.0058, "step": 12057 }, { "epoch": 2.83, "learning_rate": 1.6694327098174312e-07, "loss": 0.0146, "step": 12058 }, { "epoch": 2.83, "learning_rate": 1.6648243106323736e-07, "loss": 0.0063, "step": 12059 }, { "epoch": 2.83, "learning_rate": 1.660222227496555e-07, "loss": 0.0126, "step": 12060 }, { "epoch": 2.83, "learning_rate": 1.6556264607055617e-07, "loss": 0.0022, "step": 12061 }, { "epoch": 2.83, "learning_rate": 1.651037010554557e-07, "loss": 0.0129, "step": 12062 }, { "epoch": 2.83, "learning_rate": 1.6464538773383166e-07, "loss": 0.0003, "step": 12063 }, { "epoch": 2.83, "learning_rate": 1.6418770613512492e-07, "loss": 0.0012, "step": 12064 }, { "epoch": 2.83, "learning_rate": 1.6373065628872976e-07, "loss": 0.039, "step": 12065 }, { "epoch": 2.83, "learning_rate": 1.6327423822400268e-07, "loss": 0.0071, "step": 12066 }, { "epoch": 2.83, "learning_rate": 1.6281845197025913e-07, "loss": 0.0421, "step": 12067 }, { "epoch": 2.83, "learning_rate": 1.6236329755677572e-07, "loss": 0.0004, "step": 12068 }, { "epoch": 2.83, "learning_rate": 1.6190877501278567e-07, "loss": 0.0174, "step": 12069 }, { "epoch": 2.83, "learning_rate": 1.6145488436748124e-07, "loss": 0.0024, "step": 12070 }, { "epoch": 2.83, "learning_rate": 1.6100162565001908e-07, "loss": 0.0079, "step": 12071 }, { "epoch": 2.83, "learning_rate": 1.6054899888951036e-07, "loss": 0.001, "step": 12072 }, { "epoch": 2.83, "learning_rate": 1.600970041150274e-07, "loss": 0.0192, "step": 12073 }, { "epoch": 2.83, "learning_rate": 1.5964564135560023e-07, "loss": 0.0013, "step": 12074 }, { "epoch": 2.83, "learning_rate": 1.591949106402213e-07, "loss": 0.0002, "step": 12075 }, { "epoch": 2.83, "learning_rate": 1.5874481199784074e-07, "loss": 0.0057, "step": 12076 }, { "epoch": 2.83, "learning_rate": 1.582953454573699e-07, "loss": 0.0172, "step": 12077 }, { "epoch": 2.83, "learning_rate": 1.5784651104767456e-07, "loss": 0.0598, "step": 12078 }, { "epoch": 2.83, "learning_rate": 1.57398308797585e-07, "loss": 0.076, "step": 12079 }, { "epoch": 2.84, "learning_rate": 1.569507387358904e-07, "loss": 0.0076, "step": 12080 }, { "epoch": 2.84, "learning_rate": 1.5650380089133666e-07, "loss": 0.0003, "step": 12081 }, { "epoch": 2.84, "learning_rate": 1.560574952926308e-07, "loss": 0.0163, "step": 12082 }, { "epoch": 2.84, "learning_rate": 1.5561182196843772e-07, "loss": 0.0154, "step": 12083 }, { "epoch": 2.84, "learning_rate": 1.5516678094738557e-07, "loss": 0.0031, "step": 12084 }, { "epoch": 2.84, "learning_rate": 1.5472237225805708e-07, "loss": 0.0017, "step": 12085 }, { "epoch": 2.84, "learning_rate": 1.5427859592899608e-07, "loss": 0.0011, "step": 12086 }, { "epoch": 2.84, "learning_rate": 1.5383545198870974e-07, "loss": 0.0278, "step": 12087 }, { "epoch": 2.84, "learning_rate": 1.5339294046565755e-07, "loss": 0.0005, "step": 12088 }, { "epoch": 2.84, "learning_rate": 1.5295106138826343e-07, "loss": 0.0076, "step": 12089 }, { "epoch": 2.84, "learning_rate": 1.52509814784908e-07, "loss": 0.0353, "step": 12090 }, { "epoch": 2.84, "learning_rate": 1.5206920068393415e-07, "loss": 0.0017, "step": 12091 }, { "epoch": 2.84, "learning_rate": 1.5162921911364147e-07, "loss": 0.0202, "step": 12092 }, { "epoch": 2.84, "learning_rate": 1.5118987010228958e-07, "loss": 0.0567, "step": 12093 }, { "epoch": 2.84, "learning_rate": 1.5075115367809813e-07, "loss": 0.0003, "step": 12094 }, { "epoch": 2.84, "learning_rate": 1.5031306986924677e-07, "loss": 0.0005, "step": 12095 }, { "epoch": 2.84, "learning_rate": 1.4987561870387191e-07, "loss": 0.0672, "step": 12096 }, { "epoch": 2.84, "learning_rate": 1.4943880021007328e-07, "loss": 0.0076, "step": 12097 }, { "epoch": 2.84, "learning_rate": 1.4900261441590515e-07, "loss": 0.0001, "step": 12098 }, { "epoch": 2.84, "learning_rate": 1.4856706134938393e-07, "loss": 0.0035, "step": 12099 }, { "epoch": 2.84, "learning_rate": 1.4813214103848616e-07, "loss": 0.0052, "step": 12100 }, { "epoch": 2.84, "learning_rate": 1.476978535111462e-07, "loss": 0.0023, "step": 12101 }, { "epoch": 2.84, "learning_rate": 1.4726419879525834e-07, "loss": 0.0577, "step": 12102 }, { "epoch": 2.84, "learning_rate": 1.4683117691867698e-07, "loss": 0.0004, "step": 12103 }, { "epoch": 2.84, "learning_rate": 1.4639878790921325e-07, "loss": 0.0001, "step": 12104 }, { "epoch": 2.84, "learning_rate": 1.4596703179464156e-07, "loss": 0.0325, "step": 12105 }, { "epoch": 2.84, "learning_rate": 1.4553590860269083e-07, "loss": 0.015, "step": 12106 }, { "epoch": 2.84, "learning_rate": 1.4510541836105453e-07, "loss": 0.0033, "step": 12107 }, { "epoch": 2.84, "learning_rate": 1.4467556109738157e-07, "loss": 0.001, "step": 12108 }, { "epoch": 2.84, "learning_rate": 1.4424633683928213e-07, "loss": 0.0003, "step": 12109 }, { "epoch": 2.84, "learning_rate": 1.4381774561432414e-07, "loss": 0.0007, "step": 12110 }, { "epoch": 2.84, "learning_rate": 1.4338978745003674e-07, "loss": 0.0191, "step": 12111 }, { "epoch": 2.84, "learning_rate": 1.4296246237390897e-07, "loss": 0.0477, "step": 12112 }, { "epoch": 2.84, "learning_rate": 1.4253577041338674e-07, "loss": 0.0025, "step": 12113 }, { "epoch": 2.84, "learning_rate": 1.4210971159587582e-07, "loss": 0.027, "step": 12114 }, { "epoch": 2.84, "learning_rate": 1.4168428594874105e-07, "loss": 0.0005, "step": 12115 }, { "epoch": 2.84, "learning_rate": 1.4125949349931057e-07, "loss": 0.0242, "step": 12116 }, { "epoch": 2.84, "learning_rate": 1.4083533427486584e-07, "loss": 0.0054, "step": 12117 }, { "epoch": 2.84, "learning_rate": 1.4041180830265177e-07, "loss": 0.0022, "step": 12118 }, { "epoch": 2.84, "learning_rate": 1.3998891560986992e-07, "loss": 0.0182, "step": 12119 }, { "epoch": 2.84, "learning_rate": 1.3956665622368527e-07, "loss": 0.0266, "step": 12120 }, { "epoch": 2.84, "learning_rate": 1.3914503017121717e-07, "loss": 0.0002, "step": 12121 }, { "epoch": 2.84, "learning_rate": 1.3872403747954733e-07, "loss": 0.0005, "step": 12122 }, { "epoch": 2.85, "learning_rate": 1.3830367817571634e-07, "loss": 0.0068, "step": 12123 }, { "epoch": 2.85, "learning_rate": 1.378839522867237e-07, "loss": 0.0013, "step": 12124 }, { "epoch": 2.85, "learning_rate": 1.3746485983952896e-07, "loss": 0.0227, "step": 12125 }, { "epoch": 2.85, "learning_rate": 1.3704640086104726e-07, "loss": 0.0454, "step": 12126 }, { "epoch": 2.85, "learning_rate": 1.3662857537816045e-07, "loss": 0.0008, "step": 12127 }, { "epoch": 2.85, "learning_rate": 1.3621138341770256e-07, "loss": 0.0011, "step": 12128 }, { "epoch": 2.85, "learning_rate": 1.3579482500647001e-07, "loss": 0.0047, "step": 12129 }, { "epoch": 2.85, "learning_rate": 1.3537890017121913e-07, "loss": 0.0008, "step": 12130 }, { "epoch": 2.85, "learning_rate": 1.3496360893866412e-07, "loss": 0.0003, "step": 12131 }, { "epoch": 2.85, "learning_rate": 1.345489513354803e-07, "loss": 0.002, "step": 12132 }, { "epoch": 2.85, "learning_rate": 1.341349273882997e-07, "loss": 0.0039, "step": 12133 }, { "epoch": 2.85, "learning_rate": 1.3372153712371438e-07, "loss": 0.0006, "step": 12134 }, { "epoch": 2.85, "learning_rate": 1.3330878056827757e-07, "loss": 0.0003, "step": 12135 }, { "epoch": 2.85, "learning_rate": 1.3289665774850137e-07, "loss": 0.0132, "step": 12136 }, { "epoch": 2.85, "learning_rate": 1.3248516869085348e-07, "loss": 0.0116, "step": 12137 }, { "epoch": 2.85, "learning_rate": 1.3207431342176613e-07, "loss": 0.0003, "step": 12138 }, { "epoch": 2.85, "learning_rate": 1.316640919676282e-07, "loss": 0.0037, "step": 12139 }, { "epoch": 2.85, "learning_rate": 1.3125450435478858e-07, "loss": 0.001, "step": 12140 }, { "epoch": 2.85, "learning_rate": 1.3084555060955295e-07, "loss": 0.0001, "step": 12141 }, { "epoch": 2.85, "learning_rate": 1.3043723075818915e-07, "loss": 0.002, "step": 12142 }, { "epoch": 2.85, "learning_rate": 1.300295448269251e-07, "loss": 0.0002, "step": 12143 }, { "epoch": 2.85, "learning_rate": 1.2962249284194538e-07, "loss": 0.0335, "step": 12144 }, { "epoch": 2.85, "learning_rate": 1.2921607482939358e-07, "loss": 0.0004, "step": 12145 }, { "epoch": 2.85, "learning_rate": 1.2881029081537543e-07, "loss": 0.0152, "step": 12146 }, { "epoch": 2.85, "learning_rate": 1.2840514082595236e-07, "loss": 0.0004, "step": 12147 }, { "epoch": 2.85, "learning_rate": 1.2800062488715126e-07, "loss": 0.0253, "step": 12148 }, { "epoch": 2.85, "learning_rate": 1.2759674302495028e-07, "loss": 0.0006, "step": 12149 }, { "epoch": 2.85, "learning_rate": 1.27193495265292e-07, "loss": 0.008, "step": 12150 }, { "epoch": 2.85, "learning_rate": 1.2679088163407571e-07, "loss": 0.001, "step": 12151 }, { "epoch": 2.85, "learning_rate": 1.2638890215716294e-07, "loss": 0.0379, "step": 12152 }, { "epoch": 2.85, "learning_rate": 1.25987556860373e-07, "loss": 0.0003, "step": 12153 }, { "epoch": 2.85, "learning_rate": 1.25586845769482e-07, "loss": 0.0145, "step": 12154 }, { "epoch": 2.85, "learning_rate": 1.251867689102293e-07, "loss": 0.0019, "step": 12155 }, { "epoch": 2.85, "learning_rate": 1.2478732630831214e-07, "loss": 0.0005, "step": 12156 }, { "epoch": 2.85, "learning_rate": 1.2438851798938444e-07, "loss": 0.0003, "step": 12157 }, { "epoch": 2.85, "learning_rate": 1.239903439790635e-07, "loss": 0.0107, "step": 12158 }, { "epoch": 2.85, "learning_rate": 1.235928043029233e-07, "loss": 0.0004, "step": 12159 }, { "epoch": 2.85, "learning_rate": 1.2319589898649786e-07, "loss": 0.0012, "step": 12160 }, { "epoch": 2.85, "learning_rate": 1.2279962805528122e-07, "loss": 0.0126, "step": 12161 }, { "epoch": 2.85, "learning_rate": 1.2240399153472304e-07, "loss": 0.0457, "step": 12162 }, { "epoch": 2.85, "learning_rate": 1.2200898945023743e-07, "loss": 0.0089, "step": 12163 }, { "epoch": 2.85, "learning_rate": 1.216146218271963e-07, "loss": 0.0005, "step": 12164 }, { "epoch": 2.86, "learning_rate": 1.2122088869092609e-07, "loss": 0.0057, "step": 12165 }, { "epoch": 2.86, "learning_rate": 1.2082779006671984e-07, "loss": 0.0087, "step": 12166 }, { "epoch": 2.86, "learning_rate": 1.2043532597982298e-07, "loss": 0.0058, "step": 12167 }, { "epoch": 2.86, "learning_rate": 1.2004349645544645e-07, "loss": 0.0229, "step": 12168 }, { "epoch": 2.86, "learning_rate": 1.196523015187556e-07, "loss": 0.0048, "step": 12169 }, { "epoch": 2.86, "learning_rate": 1.192617411948782e-07, "loss": 0.0149, "step": 12170 }, { "epoch": 2.86, "learning_rate": 1.1887181550889749e-07, "loss": 0.0011, "step": 12171 }, { "epoch": 2.86, "learning_rate": 1.1848252448586007e-07, "loss": 0.0244, "step": 12172 }, { "epoch": 2.86, "learning_rate": 1.1809386815077039e-07, "loss": 0.0033, "step": 12173 }, { "epoch": 2.86, "learning_rate": 1.1770584652858963e-07, "loss": 0.0044, "step": 12174 }, { "epoch": 2.86, "learning_rate": 1.1731845964424226e-07, "loss": 0.003, "step": 12175 }, { "epoch": 2.86, "learning_rate": 1.1693170752261062e-07, "loss": 0.0074, "step": 12176 }, { "epoch": 2.86, "learning_rate": 1.1654559018853373e-07, "loss": 0.0073, "step": 12177 }, { "epoch": 2.86, "learning_rate": 1.1616010766681285e-07, "loss": 0.0007, "step": 12178 }, { "epoch": 2.86, "learning_rate": 1.1577525998220706e-07, "loss": 0.032, "step": 12179 }, { "epoch": 2.86, "learning_rate": 1.153910471594366e-07, "loss": 0.0127, "step": 12180 }, { "epoch": 2.86, "learning_rate": 1.1500746922317619e-07, "loss": 0.0019, "step": 12181 }, { "epoch": 2.86, "learning_rate": 1.1462452619806608e-07, "loss": 0.0017, "step": 12182 }, { "epoch": 2.86, "learning_rate": 1.1424221810870107e-07, "loss": 0.0018, "step": 12183 }, { "epoch": 2.86, "learning_rate": 1.1386054497963817e-07, "loss": 0.0002, "step": 12184 }, { "epoch": 2.86, "learning_rate": 1.1347950683538999e-07, "loss": 0.0037, "step": 12185 }, { "epoch": 2.86, "learning_rate": 1.1309910370043254e-07, "loss": 0.0159, "step": 12186 }, { "epoch": 2.86, "learning_rate": 1.1271933559919734e-07, "loss": 0.0025, "step": 12187 }, { "epoch": 2.86, "learning_rate": 1.1234020255607825e-07, "loss": 0.0013, "step": 12188 }, { "epoch": 2.86, "learning_rate": 1.1196170459542688e-07, "loss": 0.0012, "step": 12189 }, { "epoch": 2.86, "learning_rate": 1.1158384174155157e-07, "loss": 0.0192, "step": 12190 }, { "epoch": 2.86, "learning_rate": 1.1120661401872623e-07, "loss": 0.0019, "step": 12191 }, { "epoch": 2.86, "learning_rate": 1.1083002145117705e-07, "loss": 0.0027, "step": 12192 }, { "epoch": 2.86, "learning_rate": 1.1045406406309466e-07, "loss": 0.0117, "step": 12193 }, { "epoch": 2.86, "learning_rate": 1.1007874187862533e-07, "loss": 0.0059, "step": 12194 }, { "epoch": 2.86, "learning_rate": 1.0970405492187642e-07, "loss": 0.0024, "step": 12195 }, { "epoch": 2.86, "learning_rate": 1.0933000321691312e-07, "loss": 0.0002, "step": 12196 }, { "epoch": 2.86, "learning_rate": 1.0895658678776177e-07, "loss": 0.0052, "step": 12197 }, { "epoch": 2.86, "learning_rate": 1.0858380565840765e-07, "loss": 0.0066, "step": 12198 }, { "epoch": 2.86, "learning_rate": 1.0821165985279159e-07, "loss": 0.0337, "step": 12199 }, { "epoch": 2.86, "learning_rate": 1.0784014939482002e-07, "loss": 0.0012, "step": 12200 }, { "epoch": 2.86, "learning_rate": 1.0746927430835163e-07, "loss": 0.001, "step": 12201 }, { "epoch": 2.86, "learning_rate": 1.0709903461721072e-07, "loss": 0.0006, "step": 12202 }, { "epoch": 2.86, "learning_rate": 1.0672943034517491e-07, "loss": 0.0225, "step": 12203 }, { "epoch": 2.86, "learning_rate": 1.0636046151598522e-07, "loss": 0.0025, "step": 12204 }, { "epoch": 2.86, "learning_rate": 1.0599212815334048e-07, "loss": 0.015, "step": 12205 }, { "epoch": 2.86, "learning_rate": 1.0562443028089842e-07, "loss": 0.0089, "step": 12206 }, { "epoch": 2.86, "learning_rate": 1.0525736792227681e-07, "loss": 0.0183, "step": 12207 }, { "epoch": 2.87, "learning_rate": 1.0489094110105125e-07, "loss": 0.0002, "step": 12208 }, { "epoch": 2.87, "learning_rate": 1.0452514984075735e-07, "loss": 0.0097, "step": 12209 }, { "epoch": 2.87, "learning_rate": 1.0415999416488853e-07, "loss": 0.002, "step": 12210 }, { "epoch": 2.87, "learning_rate": 1.0379547409690161e-07, "loss": 0.006, "step": 12211 }, { "epoch": 2.87, "learning_rate": 1.0343158966020894e-07, "loss": 0.0019, "step": 12212 }, { "epoch": 2.87, "learning_rate": 1.0306834087818074e-07, "loss": 0.0006, "step": 12213 }, { "epoch": 2.87, "learning_rate": 1.0270572777414834e-07, "loss": 0.0021, "step": 12214 }, { "epoch": 2.87, "learning_rate": 1.0234375037140531e-07, "loss": 0.0192, "step": 12215 }, { "epoch": 2.87, "learning_rate": 1.0198240869319975e-07, "loss": 0.0077, "step": 12216 }, { "epoch": 2.87, "learning_rate": 1.0162170276273975e-07, "loss": 0.0011, "step": 12217 }, { "epoch": 2.87, "learning_rate": 1.0126163260319344e-07, "loss": 0.015, "step": 12218 }, { "epoch": 2.87, "learning_rate": 1.0090219823768899e-07, "loss": 0.004, "step": 12219 }, { "epoch": 2.87, "learning_rate": 1.0054339968931349e-07, "loss": 0.0028, "step": 12220 }, { "epoch": 2.87, "learning_rate": 1.0018523698111071e-07, "loss": 0.0167, "step": 12221 }, { "epoch": 2.87, "learning_rate": 9.982771013608671e-08, "loss": 0.0071, "step": 12222 }, { "epoch": 2.87, "learning_rate": 9.947081917720425e-08, "loss": 0.0169, "step": 12223 }, { "epoch": 2.87, "learning_rate": 9.911456412738718e-08, "loss": 0.0334, "step": 12224 }, { "epoch": 2.87, "learning_rate": 9.875894500951721e-08, "loss": 0.0113, "step": 12225 }, { "epoch": 2.87, "learning_rate": 9.840396184643608e-08, "loss": 0.0172, "step": 12226 }, { "epoch": 2.87, "learning_rate": 9.804961466094442e-08, "loss": 0.0001, "step": 12227 }, { "epoch": 2.87, "learning_rate": 9.769590347580183e-08, "loss": 0.0014, "step": 12228 }, { "epoch": 2.87, "learning_rate": 9.734282831372676e-08, "loss": 0.0014, "step": 12229 }, { "epoch": 2.87, "learning_rate": 9.699038919739668e-08, "loss": 0.0102, "step": 12230 }, { "epoch": 2.87, "learning_rate": 9.663858614944899e-08, "loss": 0.0017, "step": 12231 }, { "epoch": 2.87, "learning_rate": 9.628741919248119e-08, "loss": 0.0145, "step": 12232 }, { "epoch": 2.87, "learning_rate": 9.593688834904747e-08, "loss": 0.0314, "step": 12233 }, { "epoch": 2.87, "learning_rate": 9.558699364166202e-08, "loss": 0.0006, "step": 12234 }, { "epoch": 2.87, "learning_rate": 9.52377350927991e-08, "loss": 0.0009, "step": 12235 }, { "epoch": 2.87, "learning_rate": 9.488911272489299e-08, "loss": 0.0002, "step": 12236 }, { "epoch": 2.87, "learning_rate": 9.454112656033243e-08, "loss": 0.0138, "step": 12237 }, { "epoch": 2.87, "learning_rate": 9.419377662147178e-08, "loss": 0.0038, "step": 12238 }, { "epoch": 2.87, "learning_rate": 9.384706293061762e-08, "loss": 0.0094, "step": 12239 }, { "epoch": 2.87, "learning_rate": 9.350098551004327e-08, "loss": 0.0138, "step": 12240 }, { "epoch": 2.87, "learning_rate": 9.315554438197428e-08, "loss": 0.0004, "step": 12241 }, { "epoch": 2.87, "learning_rate": 9.281073956859954e-08, "loss": 0.0021, "step": 12242 }, { "epoch": 2.87, "learning_rate": 9.246657109206581e-08, "loss": 0.0167, "step": 12243 }, { "epoch": 2.87, "learning_rate": 9.212303897447872e-08, "loss": 0.0428, "step": 12244 }, { "epoch": 2.87, "learning_rate": 9.178014323790397e-08, "loss": 0.0066, "step": 12245 }, { "epoch": 2.87, "learning_rate": 9.143788390436281e-08, "loss": 0.005, "step": 12246 }, { "epoch": 2.87, "learning_rate": 9.109626099584212e-08, "loss": 0.0187, "step": 12247 }, { "epoch": 2.87, "learning_rate": 9.075527453428323e-08, "loss": 0.0003, "step": 12248 }, { "epoch": 2.87, "learning_rate": 9.04149245415864e-08, "loss": 0.0018, "step": 12249 }, { "epoch": 2.87, "learning_rate": 9.007521103961414e-08, "loss": 0.0148, "step": 12250 }, { "epoch": 2.88, "learning_rate": 8.973613405018345e-08, "loss": 0.0317, "step": 12251 }, { "epoch": 2.88, "learning_rate": 8.939769359507688e-08, "loss": 0.0036, "step": 12252 }, { "epoch": 2.88, "learning_rate": 8.90598896960293e-08, "loss": 0.0009, "step": 12253 }, { "epoch": 2.88, "learning_rate": 8.87227223747389e-08, "loss": 0.0005, "step": 12254 }, { "epoch": 2.88, "learning_rate": 8.838619165286056e-08, "loss": 0.0128, "step": 12255 }, { "epoch": 2.88, "learning_rate": 8.805029755201366e-08, "loss": 0.007, "step": 12256 }, { "epoch": 2.88, "learning_rate": 8.771504009376763e-08, "loss": 0.0053, "step": 12257 }, { "epoch": 2.88, "learning_rate": 8.738041929965746e-08, "loss": 0.0035, "step": 12258 }, { "epoch": 2.88, "learning_rate": 8.704643519117817e-08, "loss": 0.0031, "step": 12259 }, { "epoch": 2.88, "learning_rate": 8.671308778977927e-08, "loss": 0.001, "step": 12260 }, { "epoch": 2.88, "learning_rate": 8.638037711687252e-08, "loss": 0.0007, "step": 12261 }, { "epoch": 2.88, "learning_rate": 8.604830319382751e-08, "loss": 0.0034, "step": 12262 }, { "epoch": 2.88, "learning_rate": 8.571686604197271e-08, "loss": 0.0269, "step": 12263 }, { "epoch": 2.88, "learning_rate": 8.538606568259666e-08, "loss": 0.0068, "step": 12264 }, { "epoch": 2.88, "learning_rate": 8.505590213694681e-08, "loss": 0.0004, "step": 12265 }, { "epoch": 2.88, "learning_rate": 8.472637542623063e-08, "loss": 0.0182, "step": 12266 }, { "epoch": 2.88, "learning_rate": 8.43974855716101e-08, "loss": 0.0006, "step": 12267 }, { "epoch": 2.88, "learning_rate": 8.406923259421495e-08, "loss": 0.0002, "step": 12268 }, { "epoch": 2.88, "learning_rate": 8.374161651512391e-08, "loss": 0.0034, "step": 12269 }, { "epoch": 2.88, "learning_rate": 8.341463735538124e-08, "loss": 0.0019, "step": 12270 }, { "epoch": 2.88, "learning_rate": 8.308829513599015e-08, "loss": 0.0006, "step": 12271 }, { "epoch": 2.88, "learning_rate": 8.276258987791053e-08, "loss": 0.0061, "step": 12272 }, { "epoch": 2.88, "learning_rate": 8.243752160206231e-08, "loss": 0.0003, "step": 12273 }, { "epoch": 2.88, "learning_rate": 8.211309032932436e-08, "loss": 0.0003, "step": 12274 }, { "epoch": 2.88, "learning_rate": 8.178929608053554e-08, "loss": 0.0017, "step": 12275 }, { "epoch": 2.88, "learning_rate": 8.146613887649258e-08, "loss": 0.0011, "step": 12276 }, { "epoch": 2.88, "learning_rate": 8.114361873795107e-08, "loss": 0.001, "step": 12277 }, { "epoch": 2.88, "learning_rate": 8.082173568562779e-08, "loss": 0.0023, "step": 12278 }, { "epoch": 2.88, "learning_rate": 8.05004897401973e-08, "loss": 0.0023, "step": 12279 }, { "epoch": 2.88, "learning_rate": 8.0179880922292e-08, "loss": 0.0028, "step": 12280 }, { "epoch": 2.88, "learning_rate": 7.985990925250431e-08, "loss": 0.0059, "step": 12281 }, { "epoch": 2.88, "learning_rate": 7.954057475138776e-08, "loss": 0.0002, "step": 12282 }, { "epoch": 2.88, "learning_rate": 7.922187743945153e-08, "loss": 0.0493, "step": 12283 }, { "epoch": 2.88, "learning_rate": 7.890381733716701e-08, "loss": 0.0008, "step": 12284 }, { "epoch": 2.88, "learning_rate": 7.858639446496119e-08, "loss": 0.0227, "step": 12285 }, { "epoch": 2.88, "learning_rate": 7.826960884322332e-08, "loss": 0.001, "step": 12286 }, { "epoch": 2.88, "learning_rate": 7.795346049229935e-08, "loss": 0.035, "step": 12287 }, { "epoch": 2.88, "learning_rate": 7.763794943249747e-08, "loss": 0.0013, "step": 12288 }, { "epoch": 2.88, "learning_rate": 7.732307568408148e-08, "loss": 0.0506, "step": 12289 }, { "epoch": 2.88, "learning_rate": 7.700883926727632e-08, "loss": 0.0101, "step": 12290 }, { "epoch": 2.88, "learning_rate": 7.66952402022636e-08, "loss": 0.0005, "step": 12291 }, { "epoch": 2.88, "learning_rate": 7.638227850918833e-08, "loss": 0.0013, "step": 12292 }, { "epoch": 2.89, "learning_rate": 7.606995420814999e-08, "loss": 0.0001, "step": 12293 }, { "epoch": 2.89, "learning_rate": 7.575826731921032e-08, "loss": 0.015, "step": 12294 }, { "epoch": 2.89, "learning_rate": 7.544721786238773e-08, "loss": 0.0177, "step": 12295 }, { "epoch": 2.89, "learning_rate": 7.513680585766181e-08, "loss": 0.0006, "step": 12296 }, { "epoch": 2.89, "learning_rate": 7.482703132497104e-08, "loss": 0.0018, "step": 12297 }, { "epoch": 2.89, "learning_rate": 7.451789428420952e-08, "loss": 0.0114, "step": 12298 }, { "epoch": 2.89, "learning_rate": 7.42093947552358e-08, "loss": 0.0005, "step": 12299 }, { "epoch": 2.89, "learning_rate": 7.390153275786294e-08, "loss": 0.003, "step": 12300 }, { "epoch": 2.89, "learning_rate": 7.359430831186509e-08, "loss": 0.0003, "step": 12301 }, { "epoch": 2.89, "learning_rate": 7.328772143697648e-08, "loss": 0.0167, "step": 12302 }, { "epoch": 2.89, "learning_rate": 7.298177215288804e-08, "loss": 0.0125, "step": 12303 }, { "epoch": 2.89, "learning_rate": 7.267646047925181e-08, "loss": 0.0103, "step": 12304 }, { "epoch": 2.89, "learning_rate": 7.237178643567654e-08, "loss": 0.0309, "step": 12305 }, { "epoch": 2.89, "learning_rate": 7.206775004173216e-08, "loss": 0.0015, "step": 12306 }, { "epoch": 2.89, "learning_rate": 7.176435131694637e-08, "loss": 0.0002, "step": 12307 }, { "epoch": 2.89, "learning_rate": 7.14615902808069e-08, "loss": 0.0007, "step": 12308 }, { "epoch": 2.89, "learning_rate": 7.115946695276044e-08, "loss": 0.0479, "step": 12309 }, { "epoch": 2.89, "learning_rate": 7.085798135221145e-08, "loss": 0.0038, "step": 12310 }, { "epoch": 2.89, "learning_rate": 7.055713349852444e-08, "loss": 0.0211, "step": 12311 }, { "epoch": 2.89, "learning_rate": 7.025692341102286e-08, "loss": 0.0083, "step": 12312 }, { "epoch": 2.89, "learning_rate": 6.995735110899016e-08, "loss": 0.0153, "step": 12313 }, { "epoch": 2.89, "learning_rate": 6.965841661166539e-08, "loss": 0.0012, "step": 12314 }, { "epoch": 2.89, "learning_rate": 6.936011993825208e-08, "loss": 0.0005, "step": 12315 }, { "epoch": 2.89, "learning_rate": 6.906246110790826e-08, "loss": 0.0238, "step": 12316 }, { "epoch": 2.89, "learning_rate": 6.876544013975084e-08, "loss": 0.0049, "step": 12317 }, { "epoch": 2.89, "learning_rate": 6.846905705286122e-08, "loss": 0.0003, "step": 12318 }, { "epoch": 2.89, "learning_rate": 6.817331186627196e-08, "loss": 0.0008, "step": 12319 }, { "epoch": 2.89, "learning_rate": 6.78782045989823e-08, "loss": 0.0013, "step": 12320 }, { "epoch": 2.89, "learning_rate": 6.758373526994488e-08, "loss": 0.0035, "step": 12321 }, { "epoch": 2.89, "learning_rate": 6.728990389807344e-08, "loss": 0.0002, "step": 12322 }, { "epoch": 2.89, "learning_rate": 6.699671050224066e-08, "loss": 0.0267, "step": 12323 }, { "epoch": 2.89, "learning_rate": 6.670415510128037e-08, "loss": 0.0026, "step": 12324 }, { "epoch": 2.89, "learning_rate": 6.641223771397975e-08, "loss": 0.0001, "step": 12325 }, { "epoch": 2.89, "learning_rate": 6.61209583590916e-08, "loss": 0.0056, "step": 12326 }, { "epoch": 2.89, "learning_rate": 6.583031705532316e-08, "loss": 0.0013, "step": 12327 }, { "epoch": 2.89, "learning_rate": 6.554031382134396e-08, "loss": 0.0015, "step": 12328 }, { "epoch": 2.89, "learning_rate": 6.525094867577907e-08, "loss": 0.0002, "step": 12329 }, { "epoch": 2.89, "learning_rate": 6.496222163721367e-08, "loss": 0.0011, "step": 12330 }, { "epoch": 2.89, "learning_rate": 6.46741327241951e-08, "loss": 0.0411, "step": 12331 }, { "epoch": 2.89, "learning_rate": 6.438668195522635e-08, "loss": 0.0008, "step": 12332 }, { "epoch": 2.89, "learning_rate": 6.409986934877043e-08, "loss": 0.0058, "step": 12333 }, { "epoch": 2.89, "learning_rate": 6.381369492324707e-08, "loss": 0.022, "step": 12334 }, { "epoch": 2.89, "learning_rate": 6.352815869704043e-08, "loss": 0.0012, "step": 12335 }, { "epoch": 2.9, "learning_rate": 6.324326068848918e-08, "loss": 0.0002, "step": 12336 }, { "epoch": 2.9, "learning_rate": 6.295900091589092e-08, "loss": 0.0011, "step": 12337 }, { "epoch": 2.9, "learning_rate": 6.267537939750545e-08, "loss": 0.0001, "step": 12338 }, { "epoch": 2.9, "learning_rate": 6.239239615154935e-08, "loss": 0.0001, "step": 12339 }, { "epoch": 2.9, "learning_rate": 6.211005119619917e-08, "loss": 0.0116, "step": 12340 }, { "epoch": 2.9, "learning_rate": 6.182834454958708e-08, "loss": 0.0023, "step": 12341 }, { "epoch": 2.9, "learning_rate": 6.154727622980971e-08, "loss": 0.002, "step": 12342 }, { "epoch": 2.9, "learning_rate": 6.12668462549193e-08, "loss": 0.001, "step": 12343 }, { "epoch": 2.9, "learning_rate": 6.098705464292809e-08, "loss": 0.0008, "step": 12344 }, { "epoch": 2.9, "learning_rate": 6.070790141180726e-08, "loss": 0.0048, "step": 12345 }, { "epoch": 2.9, "learning_rate": 6.042938657948472e-08, "loss": 0.0005, "step": 12346 }, { "epoch": 2.9, "learning_rate": 6.015151016385278e-08, "loss": 0.0022, "step": 12347 }, { "epoch": 2.9, "learning_rate": 5.98742721827561e-08, "loss": 0.0286, "step": 12348 }, { "epoch": 2.9, "learning_rate": 5.959767265400374e-08, "loss": 0.0008, "step": 12349 }, { "epoch": 2.9, "learning_rate": 5.932171159536038e-08, "loss": 0.0026, "step": 12350 }, { "epoch": 2.9, "learning_rate": 5.904638902455295e-08, "loss": 0.0006, "step": 12351 }, { "epoch": 2.9, "learning_rate": 5.877170495926288e-08, "loss": 0.0141, "step": 12352 }, { "epoch": 2.9, "learning_rate": 5.8497659417133815e-08, "loss": 0.0187, "step": 12353 }, { "epoch": 2.9, "learning_rate": 5.8224252415768346e-08, "loss": 0.0004, "step": 12354 }, { "epoch": 2.9, "learning_rate": 5.795148397272687e-08, "loss": 0.0032, "step": 12355 }, { "epoch": 2.9, "learning_rate": 5.767935410553094e-08, "loss": 0.003, "step": 12356 }, { "epoch": 2.9, "learning_rate": 5.740786283165545e-08, "loss": 0.0036, "step": 12357 }, { "epoch": 2.9, "learning_rate": 5.713701016854201e-08, "loss": 0.0167, "step": 12358 }, { "epoch": 2.9, "learning_rate": 5.686679613358559e-08, "loss": 0.0056, "step": 12359 }, { "epoch": 2.9, "learning_rate": 5.659722074414342e-08, "loss": 0.0095, "step": 12360 }, { "epoch": 2.9, "learning_rate": 5.6328284017528326e-08, "loss": 0.0004, "step": 12361 }, { "epoch": 2.9, "learning_rate": 5.605998597101536e-08, "loss": 0.0025, "step": 12362 }, { "epoch": 2.9, "learning_rate": 5.579232662183631e-08, "loss": 0.0101, "step": 12363 }, { "epoch": 2.9, "learning_rate": 5.552530598718409e-08, "loss": 0.001, "step": 12364 }, { "epoch": 2.9, "learning_rate": 5.525892408420941e-08, "loss": 0.0607, "step": 12365 }, { "epoch": 2.9, "learning_rate": 5.499318093001971e-08, "loss": 0.0802, "step": 12366 }, { "epoch": 2.9, "learning_rate": 5.4728076541686884e-08, "loss": 0.0021, "step": 12367 }, { "epoch": 2.9, "learning_rate": 5.44636109362362e-08, "loss": 0.0008, "step": 12368 }, { "epoch": 2.9, "learning_rate": 5.419978413065408e-08, "loss": 0.0092, "step": 12369 }, { "epoch": 2.9, "learning_rate": 5.393659614188806e-08, "loss": 0.005, "step": 12370 }, { "epoch": 2.9, "learning_rate": 5.367404698684131e-08, "loss": 0.0055, "step": 12371 }, { "epoch": 2.9, "learning_rate": 5.3412136682376994e-08, "loss": 0.0302, "step": 12372 }, { "epoch": 2.9, "learning_rate": 5.3150865245318315e-08, "loss": 0.0121, "step": 12373 }, { "epoch": 2.9, "learning_rate": 5.289023269244631e-08, "loss": 0.0002, "step": 12374 }, { "epoch": 2.9, "learning_rate": 5.263023904050091e-08, "loss": 0.0082, "step": 12375 }, { "epoch": 2.9, "learning_rate": 5.23708843061832e-08, "loss": 0.0053, "step": 12376 }, { "epoch": 2.9, "learning_rate": 5.211216850614986e-08, "loss": 0.0022, "step": 12377 }, { "epoch": 2.91, "learning_rate": 5.1854091657017604e-08, "loss": 0.0023, "step": 12378 }, { "epoch": 2.91, "learning_rate": 5.159665377536538e-08, "loss": 0.0481, "step": 12379 }, { "epoch": 2.91, "learning_rate": 5.1339854877725525e-08, "loss": 0.0194, "step": 12380 }, { "epoch": 2.91, "learning_rate": 5.1083694980593735e-08, "loss": 0.018, "step": 12381 }, { "epoch": 2.91, "learning_rate": 5.08281741004224e-08, "loss": 0.0021, "step": 12382 }, { "epoch": 2.91, "learning_rate": 5.057329225362395e-08, "loss": 0.0064, "step": 12383 }, { "epoch": 2.91, "learning_rate": 5.031904945656973e-08, "loss": 0.0679, "step": 12384 }, { "epoch": 2.91, "learning_rate": 5.0065445725588915e-08, "loss": 0.0096, "step": 12385 }, { "epoch": 2.91, "learning_rate": 4.981248107697068e-08, "loss": 0.0164, "step": 12386 }, { "epoch": 2.91, "learning_rate": 4.956015552696314e-08, "loss": 0.0022, "step": 12387 }, { "epoch": 2.91, "learning_rate": 4.9308469091773334e-08, "loss": 0.0447, "step": 12388 }, { "epoch": 2.91, "learning_rate": 4.905742178756612e-08, "loss": 0.0, "step": 12389 }, { "epoch": 2.91, "learning_rate": 4.880701363046636e-08, "loss": 0.0168, "step": 12390 }, { "epoch": 2.91, "learning_rate": 4.855724463655898e-08, "loss": 0.0206, "step": 12391 }, { "epoch": 2.91, "learning_rate": 4.830811482188558e-08, "loss": 0.0013, "step": 12392 }, { "epoch": 2.91, "learning_rate": 4.805962420244781e-08, "loss": 0.0032, "step": 12393 }, { "epoch": 2.91, "learning_rate": 4.7811772794206234e-08, "loss": 0.0012, "step": 12394 }, { "epoch": 2.91, "learning_rate": 4.7564560613080344e-08, "loss": 0.0056, "step": 12395 }, { "epoch": 2.91, "learning_rate": 4.731798767494855e-08, "loss": 0.0081, "step": 12396 }, { "epoch": 2.91, "learning_rate": 4.707205399564818e-08, "loss": 0.001, "step": 12397 }, { "epoch": 2.91, "learning_rate": 4.682675959097549e-08, "loss": 0.018, "step": 12398 }, { "epoch": 2.91, "learning_rate": 4.658210447668565e-08, "loss": 0.0008, "step": 12399 }, { "epoch": 2.91, "learning_rate": 4.6338088668492764e-08, "loss": 0.0015, "step": 12400 }, { "epoch": 2.91, "learning_rate": 4.609471218206984e-08, "loss": 0.002, "step": 12401 }, { "epoch": 2.91, "learning_rate": 4.585197503304883e-08, "loss": 0.0133, "step": 12402 }, { "epoch": 2.91, "learning_rate": 4.56098772370217e-08, "loss": 0.0003, "step": 12403 }, { "epoch": 2.91, "learning_rate": 4.536841880953713e-08, "loss": 0.0002, "step": 12404 }, { "epoch": 2.91, "learning_rate": 4.512759976610381e-08, "loss": 0.0055, "step": 12405 }, { "epoch": 2.91, "learning_rate": 4.4887420122190496e-08, "loss": 0.0332, "step": 12406 }, { "epoch": 2.91, "learning_rate": 4.464787989322372e-08, "loss": 0.0012, "step": 12407 }, { "epoch": 2.91, "learning_rate": 4.4408979094588964e-08, "loss": 0.0288, "step": 12408 }, { "epoch": 2.91, "learning_rate": 4.417071774163062e-08, "loss": 0.0019, "step": 12409 }, { "epoch": 2.91, "learning_rate": 4.3933095849651994e-08, "loss": 0.0033, "step": 12410 }, { "epoch": 2.91, "learning_rate": 4.3696113433915334e-08, "loss": 0.006, "step": 12411 }, { "epoch": 2.91, "learning_rate": 4.3459770509641784e-08, "loss": 0.018, "step": 12412 }, { "epoch": 2.91, "learning_rate": 4.3224067092013654e-08, "loss": 0.0193, "step": 12413 }, { "epoch": 2.91, "learning_rate": 4.298900319616772e-08, "loss": 0.001, "step": 12414 }, { "epoch": 2.91, "learning_rate": 4.275457883720302e-08, "loss": 0.0134, "step": 12415 }, { "epoch": 2.91, "learning_rate": 4.25207940301775e-08, "loss": 0.0007, "step": 12416 }, { "epoch": 2.91, "learning_rate": 4.228764879010583e-08, "loss": 0.0147, "step": 12417 }, { "epoch": 2.91, "learning_rate": 4.205514313196268e-08, "loss": 0.0006, "step": 12418 }, { "epoch": 2.91, "learning_rate": 4.182327707068279e-08, "loss": 0.0025, "step": 12419 }, { "epoch": 2.91, "learning_rate": 4.1592050621157565e-08, "loss": 0.0082, "step": 12420 }, { "epoch": 2.92, "learning_rate": 4.136146379824069e-08, "loss": 0.0017, "step": 12421 }, { "epoch": 2.92, "learning_rate": 4.1131516616741416e-08, "loss": 0.0224, "step": 12422 }, { "epoch": 2.92, "learning_rate": 4.0902209091429056e-08, "loss": 0.0037, "step": 12423 }, { "epoch": 2.92, "learning_rate": 4.067354123703182e-08, "loss": 0.0019, "step": 12424 }, { "epoch": 2.92, "learning_rate": 4.0445513068237964e-08, "loss": 0.0051, "step": 12425 }, { "epoch": 2.92, "learning_rate": 4.021812459969354e-08, "loss": 0.004, "step": 12426 }, { "epoch": 2.92, "learning_rate": 3.9991375846003545e-08, "loss": 0.0003, "step": 12427 }, { "epoch": 2.92, "learning_rate": 3.976526682173076e-08, "loss": 0.0036, "step": 12428 }, { "epoch": 2.92, "learning_rate": 3.953979754139914e-08, "loss": 0.0017, "step": 12429 }, { "epoch": 2.92, "learning_rate": 3.9314968019491527e-08, "loss": 0.0038, "step": 12430 }, { "epoch": 2.92, "learning_rate": 3.909077827044749e-08, "loss": 0.0116, "step": 12431 }, { "epoch": 2.92, "learning_rate": 3.886722830866663e-08, "loss": 0.0032, "step": 12432 }, { "epoch": 2.92, "learning_rate": 3.864431814850744e-08, "loss": 0.0168, "step": 12433 }, { "epoch": 2.92, "learning_rate": 3.842204780428738e-08, "loss": 0.0104, "step": 12434 }, { "epoch": 2.92, "learning_rate": 3.820041729028501e-08, "loss": 0.0222, "step": 12435 }, { "epoch": 2.92, "learning_rate": 3.797942662073228e-08, "loss": 0.0064, "step": 12436 }, { "epoch": 2.92, "learning_rate": 3.775907580982563e-08, "loss": 0.0293, "step": 12437 }, { "epoch": 2.92, "learning_rate": 3.7539364871717055e-08, "loss": 0.0161, "step": 12438 }, { "epoch": 2.92, "learning_rate": 3.73202938205186e-08, "loss": 0.0014, "step": 12439 }, { "epoch": 2.92, "learning_rate": 3.7101862670302354e-08, "loss": 0.0025, "step": 12440 }, { "epoch": 2.92, "learning_rate": 3.6884071435095984e-08, "loss": 0.0002, "step": 12441 }, { "epoch": 2.92, "learning_rate": 3.66669201288905e-08, "loss": 0.0007, "step": 12442 }, { "epoch": 2.92, "learning_rate": 3.645040876563144e-08, "loss": 0.0047, "step": 12443 }, { "epoch": 2.92, "learning_rate": 3.623453735922766e-08, "loss": 0.0002, "step": 12444 }, { "epoch": 2.92, "learning_rate": 3.601930592354142e-08, "loss": 0.0006, "step": 12445 }, { "epoch": 2.92, "learning_rate": 3.580471447239942e-08, "loss": 0.0003, "step": 12446 }, { "epoch": 2.92, "learning_rate": 3.559076301958508e-08, "loss": 0.0473, "step": 12447 }, { "epoch": 2.92, "learning_rate": 3.537745157883854e-08, "loss": 0.0002, "step": 12448 }, { "epoch": 2.92, "learning_rate": 3.5164780163861045e-08, "loss": 0.0106, "step": 12449 }, { "epoch": 2.92, "learning_rate": 3.49527487883139e-08, "loss": 0.0002, "step": 12450 }, { "epoch": 2.92, "learning_rate": 3.4741357465815087e-08, "loss": 0.0008, "step": 12451 }, { "epoch": 2.92, "learning_rate": 3.453060620994264e-08, "loss": 0.0255, "step": 12452 }, { "epoch": 2.92, "learning_rate": 3.43204950342324e-08, "loss": 0.0069, "step": 12453 }, { "epoch": 2.92, "learning_rate": 3.4111023952179135e-08, "loss": 0.0003, "step": 12454 }, { "epoch": 2.92, "learning_rate": 3.3902192977238736e-08, "loss": 0.0085, "step": 12455 }, { "epoch": 2.92, "learning_rate": 3.36940021228227e-08, "loss": 0.0105, "step": 12456 }, { "epoch": 2.92, "learning_rate": 3.348645140230478e-08, "loss": 0.0124, "step": 12457 }, { "epoch": 2.92, "learning_rate": 3.32795408290143e-08, "loss": 0.012, "step": 12458 }, { "epoch": 2.92, "learning_rate": 3.3073270416242864e-08, "loss": 0.0338, "step": 12459 }, { "epoch": 2.92, "learning_rate": 3.286764017723876e-08, "loss": 0.0014, "step": 12460 }, { "epoch": 2.92, "learning_rate": 3.2662650125206975e-08, "loss": 0.0013, "step": 12461 }, { "epoch": 2.92, "learning_rate": 3.245830027331809e-08, "loss": 0.0006, "step": 12462 }, { "epoch": 2.92, "learning_rate": 3.225459063469494e-08, "loss": 0.0438, "step": 12463 }, { "epoch": 2.93, "learning_rate": 3.20515212224215e-08, "loss": 0.006, "step": 12464 }, { "epoch": 2.93, "learning_rate": 3.1849092049541785e-08, "loss": 0.0066, "step": 12465 }, { "epoch": 2.93, "learning_rate": 3.164730312905762e-08, "loss": 0.0006, "step": 12466 }, { "epoch": 2.93, "learning_rate": 3.144615447393085e-08, "loss": 0.0002, "step": 12467 }, { "epoch": 2.93, "learning_rate": 3.1245646097078916e-08, "loss": 0.0072, "step": 12468 }, { "epoch": 2.93, "learning_rate": 3.104577801138264e-08, "loss": 0.0019, "step": 12469 }, { "epoch": 2.93, "learning_rate": 3.084655022967842e-08, "loss": 0.0176, "step": 12470 }, { "epoch": 2.93, "learning_rate": 3.0647962764762674e-08, "loss": 0.0011, "step": 12471 }, { "epoch": 2.93, "learning_rate": 3.045001562939076e-08, "loss": 0.0137, "step": 12472 }, { "epoch": 2.93, "learning_rate": 3.025270883627696e-08, "loss": 0.0051, "step": 12473 }, { "epoch": 2.93, "learning_rate": 3.0056042398093344e-08, "loss": 0.0041, "step": 12474 }, { "epoch": 2.93, "learning_rate": 2.986001632747315e-08, "loss": 0.0044, "step": 12475 }, { "epoch": 2.93, "learning_rate": 2.966463063700631e-08, "loss": 0.0388, "step": 12476 }, { "epoch": 2.93, "learning_rate": 2.9469885339242775e-08, "loss": 0.036, "step": 12477 }, { "epoch": 2.93, "learning_rate": 2.927578044669033e-08, "loss": 0.0002, "step": 12478 }, { "epoch": 2.93, "learning_rate": 2.9082315971817877e-08, "loss": 0.0021, "step": 12479 }, { "epoch": 2.93, "learning_rate": 2.8889491927048817e-08, "loss": 0.0046, "step": 12480 }, { "epoch": 2.93, "learning_rate": 2.869730832477102e-08, "loss": 0.0418, "step": 12481 }, { "epoch": 2.93, "learning_rate": 2.850576517732684e-08, "loss": 0.0038, "step": 12482 }, { "epoch": 2.93, "learning_rate": 2.8314862497019758e-08, "loss": 0.0372, "step": 12483 }, { "epoch": 2.93, "learning_rate": 2.8124600296109973e-08, "loss": 0.0324, "step": 12484 }, { "epoch": 2.93, "learning_rate": 2.7934978586819927e-08, "loss": 0.0351, "step": 12485 }, { "epoch": 2.93, "learning_rate": 2.774599738132877e-08, "loss": 0.01, "step": 12486 }, { "epoch": 2.93, "learning_rate": 2.755765669177346e-08, "loss": 0.0002, "step": 12487 }, { "epoch": 2.93, "learning_rate": 2.7369956530252097e-08, "loss": 0.0074, "step": 12488 }, { "epoch": 2.93, "learning_rate": 2.7182896908819477e-08, "loss": 0.0012, "step": 12489 }, { "epoch": 2.93, "learning_rate": 2.6996477839491554e-08, "loss": 0.0067, "step": 12490 }, { "epoch": 2.93, "learning_rate": 2.6810699334242074e-08, "loss": 0.0021, "step": 12491 }, { "epoch": 2.93, "learning_rate": 2.662556140500261e-08, "loss": 0.0017, "step": 12492 }, { "epoch": 2.93, "learning_rate": 2.6441064063665867e-08, "loss": 0.0185, "step": 12493 }, { "epoch": 2.93, "learning_rate": 2.6257207322080147e-08, "loss": 0.0002, "step": 12494 }, { "epoch": 2.93, "learning_rate": 2.6073991192057114e-08, "loss": 0.0181, "step": 12495 }, { "epoch": 2.93, "learning_rate": 2.5891415685361797e-08, "loss": 0.0015, "step": 12496 }, { "epoch": 2.93, "learning_rate": 2.57094808137226e-08, "loss": 0.0149, "step": 12497 }, { "epoch": 2.93, "learning_rate": 2.552818658882572e-08, "loss": 0.0002, "step": 12498 }, { "epoch": 2.93, "learning_rate": 2.5347533022314074e-08, "loss": 0.0003, "step": 12499 }, { "epoch": 2.93, "learning_rate": 2.516752012579282e-08, "loss": 0.0027, "step": 12500 }, { "epoch": 2.93, "learning_rate": 2.4988147910822713e-08, "loss": 0.0058, "step": 12501 }, { "epoch": 2.93, "learning_rate": 2.4809416388924536e-08, "loss": 0.0109, "step": 12502 }, { "epoch": 2.93, "learning_rate": 2.463132557158021e-08, "loss": 0.0104, "step": 12503 }, { "epoch": 2.93, "learning_rate": 2.445387547022615e-08, "loss": 0.0248, "step": 12504 }, { "epoch": 2.93, "learning_rate": 2.4277066096261014e-08, "loss": 0.0027, "step": 12505 }, { "epoch": 2.94, "learning_rate": 2.4100897461041273e-08, "loss": 0.0023, "step": 12506 }, { "epoch": 2.94, "learning_rate": 2.3925369575882318e-08, "loss": 0.003, "step": 12507 }, { "epoch": 2.94, "learning_rate": 2.3750482452058465e-08, "loss": 0.0005, "step": 12508 }, { "epoch": 2.94, "learning_rate": 2.3576236100801842e-08, "loss": 0.0006, "step": 12509 }, { "epoch": 2.94, "learning_rate": 2.3402630533304605e-08, "loss": 0.0014, "step": 12510 }, { "epoch": 2.94, "learning_rate": 2.3229665760717834e-08, "loss": 0.0249, "step": 12511 }, { "epoch": 2.94, "learning_rate": 2.3057341794150423e-08, "loss": 0.0072, "step": 12512 }, { "epoch": 2.94, "learning_rate": 2.2885658644671294e-08, "loss": 0.0042, "step": 12513 }, { "epoch": 2.94, "learning_rate": 2.2714616323306072e-08, "loss": 0.007, "step": 12514 }, { "epoch": 2.94, "learning_rate": 2.2544214841043745e-08, "loss": 0.0145, "step": 12515 }, { "epoch": 2.94, "learning_rate": 2.2374454208826667e-08, "loss": 0.0009, "step": 12516 }, { "epoch": 2.94, "learning_rate": 2.2205334437558344e-08, "loss": 0.0003, "step": 12517 }, { "epoch": 2.94, "learning_rate": 2.203685553810342e-08, "loss": 0.0007, "step": 12518 }, { "epoch": 2.94, "learning_rate": 2.1869017521282122e-08, "loss": 0.0651, "step": 12519 }, { "epoch": 2.94, "learning_rate": 2.1701820397873607e-08, "loss": 0.0253, "step": 12520 }, { "epoch": 2.94, "learning_rate": 2.1535264178619286e-08, "loss": 0.0057, "step": 12521 }, { "epoch": 2.94, "learning_rate": 2.1369348874215045e-08, "loss": 0.0004, "step": 12522 }, { "epoch": 2.94, "learning_rate": 2.1204074495317918e-08, "loss": 0.0008, "step": 12523 }, { "epoch": 2.94, "learning_rate": 2.103944105254385e-08, "loss": 0.001, "step": 12524 }, { "epoch": 2.94, "learning_rate": 2.0875448556466617e-08, "loss": 0.0021, "step": 12525 }, { "epoch": 2.94, "learning_rate": 2.0712097017621113e-08, "loss": 0.0063, "step": 12526 }, { "epoch": 2.94, "learning_rate": 2.0549386446497842e-08, "loss": 0.0036, "step": 12527 }, { "epoch": 2.94, "learning_rate": 2.0387316853547333e-08, "loss": 0.003, "step": 12528 }, { "epoch": 2.94, "learning_rate": 2.022588824918015e-08, "loss": 0.0065, "step": 12529 }, { "epoch": 2.94, "learning_rate": 2.0065100643764657e-08, "loss": 0.0239, "step": 12530 }, { "epoch": 2.94, "learning_rate": 1.9904954047629265e-08, "loss": 0.0028, "step": 12531 }, { "epoch": 2.94, "learning_rate": 1.974544847105797e-08, "loss": 0.0001, "step": 12532 }, { "epoch": 2.94, "learning_rate": 1.958658392429702e-08, "loss": 0.0255, "step": 12533 }, { "epoch": 2.94, "learning_rate": 1.9428360417550472e-08, "loss": 0.0025, "step": 12534 }, { "epoch": 2.94, "learning_rate": 1.9270777960980203e-08, "loss": 0.0022, "step": 12535 }, { "epoch": 2.94, "learning_rate": 1.911383656470811e-08, "loss": 0.0162, "step": 12536 }, { "epoch": 2.94, "learning_rate": 1.895753623881502e-08, "loss": 0.0102, "step": 12537 }, { "epoch": 2.94, "learning_rate": 1.8801876993339574e-08, "loss": 0.0221, "step": 12538 }, { "epoch": 2.94, "learning_rate": 1.8646858838279324e-08, "loss": 0.0025, "step": 12539 }, { "epoch": 2.94, "learning_rate": 1.8492481783591866e-08, "loss": 0.0133, "step": 12540 }, { "epoch": 2.94, "learning_rate": 1.8338745839191485e-08, "loss": 0.0005, "step": 12541 }, { "epoch": 2.94, "learning_rate": 1.818565101495473e-08, "loss": 0.0014, "step": 12542 }, { "epoch": 2.94, "learning_rate": 1.8033197320713736e-08, "loss": 0.0153, "step": 12543 }, { "epoch": 2.94, "learning_rate": 1.7881384766259558e-08, "loss": 0.006, "step": 12544 }, { "epoch": 2.94, "learning_rate": 1.7730213361344397e-08, "loss": 0.0116, "step": 12545 }, { "epoch": 2.94, "learning_rate": 1.7579683115678258e-08, "loss": 0.0034, "step": 12546 }, { "epoch": 2.94, "learning_rate": 1.742979403892897e-08, "loss": 0.0044, "step": 12547 }, { "epoch": 2.94, "learning_rate": 1.7280546140723276e-08, "loss": 0.0165, "step": 12548 }, { "epoch": 2.95, "learning_rate": 1.713193943064795e-08, "loss": 0.0019, "step": 12549 }, { "epoch": 2.95, "learning_rate": 1.6983973918248685e-08, "loss": 0.0004, "step": 12550 }, { "epoch": 2.95, "learning_rate": 1.6836649613027887e-08, "loss": 0.0075, "step": 12551 }, { "epoch": 2.95, "learning_rate": 1.6689966524449097e-08, "loss": 0.0206, "step": 12552 }, { "epoch": 2.95, "learning_rate": 1.6543924661933664e-08, "loss": 0.0059, "step": 12553 }, { "epoch": 2.95, "learning_rate": 1.6398524034861862e-08, "loss": 0.058, "step": 12554 }, { "epoch": 2.95, "learning_rate": 1.625376465257289e-08, "loss": 0.0001, "step": 12555 }, { "epoch": 2.95, "learning_rate": 1.6109646524363753e-08, "loss": 0.0133, "step": 12556 }, { "epoch": 2.95, "learning_rate": 1.596616965949149e-08, "loss": 0.0197, "step": 12557 }, { "epoch": 2.95, "learning_rate": 1.5823334067172068e-08, "loss": 0.011, "step": 12558 }, { "epoch": 2.95, "learning_rate": 1.5681139756579256e-08, "loss": 0.002, "step": 12559 }, { "epoch": 2.95, "learning_rate": 1.5539586736846857e-08, "loss": 0.012, "step": 12560 }, { "epoch": 2.95, "learning_rate": 1.539867501706649e-08, "loss": 0.0009, "step": 12561 }, { "epoch": 2.95, "learning_rate": 1.525840460628869e-08, "loss": 0.005, "step": 12562 }, { "epoch": 2.95, "learning_rate": 1.511877551352292e-08, "loss": 0.0149, "step": 12563 }, { "epoch": 2.95, "learning_rate": 1.4979787747737562e-08, "loss": 0.0156, "step": 12564 }, { "epoch": 2.95, "learning_rate": 1.4841441317859917e-08, "loss": 0.0006, "step": 12565 }, { "epoch": 2.95, "learning_rate": 1.47037362327751e-08, "loss": 0.0004, "step": 12566 }, { "epoch": 2.95, "learning_rate": 1.456667250133048e-08, "loss": 0.0032, "step": 12567 }, { "epoch": 2.95, "learning_rate": 1.4430250132326795e-08, "loss": 0.0091, "step": 12568 }, { "epoch": 2.95, "learning_rate": 1.4294469134527034e-08, "loss": 0.0179, "step": 12569 }, { "epoch": 2.95, "learning_rate": 1.4159329516654218e-08, "loss": 0.0003, "step": 12570 }, { "epoch": 2.95, "learning_rate": 1.4024831287385854e-08, "loss": 0.0179, "step": 12571 }, { "epoch": 2.95, "learning_rate": 1.3890974455361695e-08, "loss": 0.0138, "step": 12572 }, { "epoch": 2.95, "learning_rate": 1.3757759029180418e-08, "loss": 0.0036, "step": 12573 }, { "epoch": 2.95, "learning_rate": 1.3625185017396292e-08, "loss": 0.0016, "step": 12574 }, { "epoch": 2.95, "learning_rate": 1.3493252428526948e-08, "loss": 0.0011, "step": 12575 }, { "epoch": 2.95, "learning_rate": 1.33619612710445e-08, "loss": 0.0023, "step": 12576 }, { "epoch": 2.95, "learning_rate": 1.3231311553382198e-08, "loss": 0.0032, "step": 12577 }, { "epoch": 2.95, "learning_rate": 1.310130328393222e-08, "loss": 0.0001, "step": 12578 }, { "epoch": 2.95, "learning_rate": 1.2971936471044556e-08, "loss": 0.0011, "step": 12579 }, { "epoch": 2.95, "learning_rate": 1.284321112302922e-08, "loss": 0.003, "step": 12580 }, { "epoch": 2.95, "learning_rate": 1.2715127248152937e-08, "loss": 0.0324, "step": 12581 }, { "epoch": 2.95, "learning_rate": 1.2587684854642456e-08, "loss": 0.0004, "step": 12582 }, { "epoch": 2.95, "learning_rate": 1.2460883950684567e-08, "loss": 0.0011, "step": 12583 }, { "epoch": 2.95, "learning_rate": 1.233472454442275e-08, "loss": 0.0003, "step": 12584 }, { "epoch": 2.95, "learning_rate": 1.2209206643960525e-08, "loss": 0.0001, "step": 12585 }, { "epoch": 2.95, "learning_rate": 1.2084330257359222e-08, "loss": 0.0181, "step": 12586 }, { "epoch": 2.95, "learning_rate": 1.1960095392641313e-08, "loss": 0.0374, "step": 12587 }, { "epoch": 2.95, "learning_rate": 1.183650205778486e-08, "loss": 0.0233, "step": 12588 }, { "epoch": 2.95, "learning_rate": 1.1713550260729068e-08, "loss": 0.0004, "step": 12589 }, { "epoch": 2.95, "learning_rate": 1.1591240009370952e-08, "loss": 0.0898, "step": 12590 }, { "epoch": 2.96, "learning_rate": 1.1469571311566452e-08, "loss": 0.0091, "step": 12591 }, { "epoch": 2.96, "learning_rate": 1.1348544175129318e-08, "loss": 0.0034, "step": 12592 }, { "epoch": 2.96, "learning_rate": 1.1228158607833328e-08, "loss": 0.0013, "step": 12593 }, { "epoch": 2.96, "learning_rate": 1.11084146174123e-08, "loss": 0.0153, "step": 12594 }, { "epoch": 2.96, "learning_rate": 1.0989312211556746e-08, "loss": 0.0044, "step": 12595 }, { "epoch": 2.96, "learning_rate": 1.0870851397916105e-08, "loss": 0.0009, "step": 12596 }, { "epoch": 2.96, "learning_rate": 1.0753032184099842e-08, "loss": 0.0031, "step": 12597 }, { "epoch": 2.96, "learning_rate": 1.0635854577674132e-08, "loss": 0.0002, "step": 12598 }, { "epoch": 2.96, "learning_rate": 1.0519318586166283e-08, "loss": 0.0158, "step": 12599 }, { "epoch": 2.96, "learning_rate": 1.040342421706142e-08, "loss": 0.0003, "step": 12600 }, { "epoch": 2.96, "learning_rate": 1.0288171477803589e-08, "loss": 0.0003, "step": 12601 }, { "epoch": 2.96, "learning_rate": 1.0173560375793535e-08, "loss": 0.0021, "step": 12602 }, { "epoch": 2.96, "learning_rate": 1.0059590918395368e-08, "loss": 0.0004, "step": 12603 }, { "epoch": 2.96, "learning_rate": 9.94626311292768e-09, "loss": 0.0068, "step": 12604 }, { "epoch": 2.96, "learning_rate": 9.83357696667131e-09, "loss": 0.0763, "step": 12605 }, { "epoch": 2.96, "learning_rate": 9.721532486861585e-09, "loss": 0.029, "step": 12606 }, { "epoch": 2.96, "learning_rate": 9.610129680696079e-09, "loss": 0.0186, "step": 12607 }, { "epoch": 2.96, "learning_rate": 9.49936855533018e-09, "loss": 0.0017, "step": 12608 }, { "epoch": 2.96, "learning_rate": 9.389249117879306e-09, "loss": 0.0015, "step": 12609 }, { "epoch": 2.96, "learning_rate": 9.279771375414471e-09, "loss": 0.0002, "step": 12610 }, { "epoch": 2.96, "learning_rate": 9.170935334968934e-09, "loss": 0.0077, "step": 12611 }, { "epoch": 2.96, "learning_rate": 9.06274100353155e-09, "loss": 0.0053, "step": 12612 }, { "epoch": 2.96, "learning_rate": 8.955188388053426e-09, "loss": 0.0011, "step": 12613 }, { "epoch": 2.96, "learning_rate": 8.84827749544126e-09, "loss": 0.008, "step": 12614 }, { "epoch": 2.96, "learning_rate": 8.74200833256289e-09, "loss": 0.0067, "step": 12615 }, { "epoch": 2.96, "learning_rate": 8.636380906243969e-09, "loss": 0.0038, "step": 12616 }, { "epoch": 2.96, "learning_rate": 8.531395223267958e-09, "loss": 0.0016, "step": 12617 }, { "epoch": 2.96, "learning_rate": 8.427051290378352e-09, "loss": 0.0257, "step": 12618 }, { "epoch": 2.96, "learning_rate": 8.323349114277567e-09, "loss": 0.0007, "step": 12619 }, { "epoch": 2.96, "learning_rate": 8.220288701626945e-09, "loss": 0.0016, "step": 12620 }, { "epoch": 2.96, "learning_rate": 8.11787005904452e-09, "loss": 0.0015, "step": 12621 }, { "epoch": 2.96, "learning_rate": 8.016093193109475e-09, "loss": 0.0129, "step": 12622 }, { "epoch": 2.96, "learning_rate": 7.914958110358806e-09, "loss": 0.0005, "step": 12623 }, { "epoch": 2.96, "learning_rate": 7.814464817288424e-09, "loss": 0.0024, "step": 12624 }, { "epoch": 2.96, "learning_rate": 7.71461332035428e-09, "loss": 0.0278, "step": 12625 }, { "epoch": 2.96, "learning_rate": 7.6154036259668e-09, "loss": 0.0309, "step": 12626 }, { "epoch": 2.96, "learning_rate": 7.516835740500883e-09, "loss": 0.0142, "step": 12627 }, { "epoch": 2.96, "learning_rate": 7.4189096702870225e-09, "loss": 0.0, "step": 12628 }, { "epoch": 2.96, "learning_rate": 7.32162542161352e-09, "loss": 0.0091, "step": 12629 }, { "epoch": 2.96, "learning_rate": 7.224983000729824e-09, "loss": 0.0003, "step": 12630 }, { "epoch": 2.96, "learning_rate": 7.1289824138442984e-09, "loss": 0.0197, "step": 12631 }, { "epoch": 2.96, "learning_rate": 7.033623667122014e-09, "loss": 0.0003, "step": 12632 }, { "epoch": 2.96, "learning_rate": 6.93890676668807e-09, "loss": 0.002, "step": 12633 }, { "epoch": 2.97, "learning_rate": 6.844831718625378e-09, "loss": 0.0032, "step": 12634 }, { "epoch": 2.97, "learning_rate": 6.751398528977992e-09, "loss": 0.0173, "step": 12635 }, { "epoch": 2.97, "learning_rate": 6.65860720374556e-09, "loss": 0.0005, "step": 12636 }, { "epoch": 2.97, "learning_rate": 6.566457748888866e-09, "loss": 0.0019, "step": 12637 }, { "epoch": 2.97, "learning_rate": 6.4749501703254e-09, "loss": 0.0005, "step": 12638 }, { "epoch": 2.97, "learning_rate": 6.3840844739349036e-09, "loss": 0.0143, "step": 12639 }, { "epoch": 2.97, "learning_rate": 6.293860665551599e-09, "loss": 0.0152, "step": 12640 }, { "epoch": 2.97, "learning_rate": 6.2042787509719596e-09, "loss": 0.0018, "step": 12641 }, { "epoch": 2.97, "learning_rate": 6.115338735949161e-09, "loss": 0.0094, "step": 12642 }, { "epoch": 2.97, "learning_rate": 6.027040626196412e-09, "loss": 0.0041, "step": 12643 }, { "epoch": 2.97, "learning_rate": 5.939384427383621e-09, "loss": 0.0013, "step": 12644 }, { "epoch": 2.97, "learning_rate": 5.852370145141839e-09, "loss": 0.002, "step": 12645 }, { "epoch": 2.97, "learning_rate": 5.765997785059929e-09, "loss": 0.0004, "step": 12646 }, { "epoch": 2.97, "learning_rate": 5.6802673526867855e-09, "loss": 0.0059, "step": 12647 }, { "epoch": 2.97, "learning_rate": 5.595178853526894e-09, "loss": 0.0011, "step": 12648 }, { "epoch": 2.97, "learning_rate": 5.510732293045884e-09, "loss": 0.0022, "step": 12649 }, { "epoch": 2.97, "learning_rate": 5.4269276766694135e-09, "loss": 0.0048, "step": 12650 }, { "epoch": 2.97, "learning_rate": 5.343765009777624e-09, "loss": 0.0006, "step": 12651 }, { "epoch": 2.97, "learning_rate": 5.261244297714019e-09, "loss": 0.001, "step": 12652 }, { "epoch": 2.97, "learning_rate": 5.1793655457776924e-09, "loss": 0.0313, "step": 12653 }, { "epoch": 2.97, "learning_rate": 5.098128759228882e-09, "loss": 0.0052, "step": 12654 }, { "epoch": 2.97, "learning_rate": 5.017533943284525e-09, "loss": 0.0224, "step": 12655 }, { "epoch": 2.97, "learning_rate": 4.937581103121592e-09, "loss": 0.0012, "step": 12656 }, { "epoch": 2.97, "learning_rate": 4.858270243875973e-09, "loss": 0.0003, "step": 12657 }, { "epoch": 2.97, "learning_rate": 4.779601370640263e-09, "loss": 0.0222, "step": 12658 }, { "epoch": 2.97, "learning_rate": 4.701574488468197e-09, "loss": 0.0021, "step": 12659 }, { "epoch": 2.97, "learning_rate": 4.624189602371321e-09, "loss": 0.0027, "step": 12660 }, { "epoch": 2.97, "learning_rate": 4.5474467173212136e-09, "loss": 0.0163, "step": 12661 }, { "epoch": 2.97, "learning_rate": 4.471345838245045e-09, "loss": 0.0004, "step": 12662 }, { "epoch": 2.97, "learning_rate": 4.395886970032237e-09, "loss": 0.0112, "step": 12663 }, { "epoch": 2.97, "learning_rate": 4.321070117528914e-09, "loss": 0.0004, "step": 12664 }, { "epoch": 2.97, "learning_rate": 4.24689528554012e-09, "loss": 0.0069, "step": 12665 }, { "epoch": 2.97, "learning_rate": 4.173362478830934e-09, "loss": 0.0085, "step": 12666 }, { "epoch": 2.97, "learning_rate": 4.100471702124242e-09, "loss": 0.0132, "step": 12667 }, { "epoch": 2.97, "learning_rate": 4.028222960101858e-09, "loss": 0.0002, "step": 12668 }, { "epoch": 2.97, "learning_rate": 3.956616257403401e-09, "loss": 0.0006, "step": 12669 }, { "epoch": 2.97, "learning_rate": 3.885651598629636e-09, "loss": 0.0005, "step": 12670 }, { "epoch": 2.97, "learning_rate": 3.815328988336919e-09, "loss": 0.001, "step": 12671 }, { "epoch": 2.97, "learning_rate": 3.745648431042748e-09, "loss": 0.0387, "step": 12672 }, { "epoch": 2.97, "learning_rate": 3.6766099312235404e-09, "loss": 0.0096, "step": 12673 }, { "epoch": 2.97, "learning_rate": 3.608213493312418e-09, "loss": 0.0253, "step": 12674 }, { "epoch": 2.97, "learning_rate": 3.540459121703643e-09, "loss": 0.001, "step": 12675 }, { "epoch": 2.97, "learning_rate": 3.47334682074707e-09, "loss": 0.0048, "step": 12676 }, { "epoch": 2.98, "learning_rate": 3.4068765947559147e-09, "loss": 0.0032, "step": 12677 }, { "epoch": 2.98, "learning_rate": 3.3410484479978743e-09, "loss": 0.0014, "step": 12678 }, { "epoch": 2.98, "learning_rate": 3.275862384701789e-09, "loss": 0.0014, "step": 12679 }, { "epoch": 2.98, "learning_rate": 3.211318409053199e-09, "loss": 0.0023, "step": 12680 }, { "epoch": 2.98, "learning_rate": 3.1474165251998977e-09, "loss": 0.0017, "step": 12681 }, { "epoch": 2.98, "learning_rate": 3.0841567372441595e-09, "loss": 0.001, "step": 12682 }, { "epoch": 2.98, "learning_rate": 3.0215390492516207e-09, "loss": 0.072, "step": 12683 }, { "epoch": 2.98, "learning_rate": 2.9595634652412887e-09, "loss": 0.0001, "step": 12684 }, { "epoch": 2.98, "learning_rate": 2.898229989196644e-09, "loss": 0.0025, "step": 12685 }, { "epoch": 2.98, "learning_rate": 2.8375386250545365e-09, "loss": 0.005, "step": 12686 }, { "epoch": 2.98, "learning_rate": 2.7774893767151812e-09, "loss": 0.035, "step": 12687 }, { "epoch": 2.98, "learning_rate": 2.7180822480343815e-09, "loss": 0.0001, "step": 12688 }, { "epoch": 2.98, "learning_rate": 2.6593172428279746e-09, "loss": 0.0063, "step": 12689 }, { "epoch": 2.98, "learning_rate": 2.6011943648718285e-09, "loss": 0.0487, "step": 12690 }, { "epoch": 2.98, "learning_rate": 2.543713617897403e-09, "loss": 0.0089, "step": 12691 }, { "epoch": 2.98, "learning_rate": 2.4868750055973e-09, "loss": 0.0023, "step": 12692 }, { "epoch": 2.98, "learning_rate": 2.430678531621933e-09, "loss": 0.0002, "step": 12693 }, { "epoch": 2.98, "learning_rate": 2.375124199580636e-09, "loss": 0.0017, "step": 12694 }, { "epoch": 2.98, "learning_rate": 2.3202120130427775e-09, "loss": 0.0038, "step": 12695 }, { "epoch": 2.98, "learning_rate": 2.2659419755344248e-09, "loss": 0.0015, "step": 12696 }, { "epoch": 2.98, "learning_rate": 2.2123140905416784e-09, "loss": 0.0228, "step": 12697 }, { "epoch": 2.98, "learning_rate": 2.1593283615095607e-09, "loss": 0.0012, "step": 12698 }, { "epoch": 2.98, "learning_rate": 2.1069847918397946e-09, "loss": 0.0059, "step": 12699 }, { "epoch": 2.98, "learning_rate": 2.055283384896356e-09, "loss": 0.0568, "step": 12700 }, { "epoch": 2.98, "learning_rate": 2.004224143998812e-09, "loss": 0.0055, "step": 12701 }, { "epoch": 2.98, "learning_rate": 1.9538070724256508e-09, "loss": 0.0004, "step": 12702 }, { "epoch": 2.98, "learning_rate": 1.9040321734176138e-09, "loss": 0.0088, "step": 12703 }, { "epoch": 2.98, "learning_rate": 1.8548994501699224e-09, "loss": 0.0044, "step": 12704 }, { "epoch": 2.98, "learning_rate": 1.8064089058389412e-09, "loss": 0.0002, "step": 12705 }, { "epoch": 2.98, "learning_rate": 1.7585605435399555e-09, "loss": 0.0006, "step": 12706 }, { "epoch": 2.98, "learning_rate": 1.7113543663449529e-09, "loss": 0.0439, "step": 12707 }, { "epoch": 2.98, "learning_rate": 1.664790377285952e-09, "loss": 0.0023, "step": 12708 }, { "epoch": 2.98, "learning_rate": 1.6188685793550042e-09, "loss": 0.0131, "step": 12709 }, { "epoch": 2.98, "learning_rate": 1.5735889755008616e-09, "loss": 0.002, "step": 12710 }, { "epoch": 2.98, "learning_rate": 1.5289515686323087e-09, "loss": 0.0016, "step": 12711 }, { "epoch": 2.98, "learning_rate": 1.4849563616159413e-09, "loss": 0.0089, "step": 12712 }, { "epoch": 2.98, "learning_rate": 1.441603357277277e-09, "loss": 0.0005, "step": 12713 }, { "epoch": 2.98, "learning_rate": 1.3988925584007552e-09, "loss": 0.0298, "step": 12714 }, { "epoch": 2.98, "learning_rate": 1.356823967730847e-09, "loss": 0.0101, "step": 12715 }, { "epoch": 2.98, "learning_rate": 1.3153975879687253e-09, "loss": 0.0018, "step": 12716 }, { "epoch": 2.98, "learning_rate": 1.2746134217755946e-09, "loss": 0.0032, "step": 12717 }, { "epoch": 2.98, "learning_rate": 1.2344714717715811e-09, "loss": 0.0096, "step": 12718 }, { "epoch": 2.99, "learning_rate": 1.1949717405335126e-09, "loss": 0.0022, "step": 12719 }, { "epoch": 2.99, "learning_rate": 1.1561142305982487e-09, "loss": 0.001, "step": 12720 }, { "epoch": 2.99, "learning_rate": 1.1178989444637911e-09, "loss": 0.0177, "step": 12721 }, { "epoch": 2.99, "learning_rate": 1.0803258845826226e-09, "loss": 0.0018, "step": 12722 }, { "epoch": 2.99, "learning_rate": 1.043395053369478e-09, "loss": 0.0, "step": 12723 }, { "epoch": 2.99, "learning_rate": 1.007106453195794e-09, "loss": 0.0012, "step": 12724 }, { "epoch": 2.99, "learning_rate": 9.714600863919288e-10, "loss": 0.0107, "step": 12725 }, { "epoch": 2.99, "learning_rate": 9.364559552482722e-10, "loss": 0.0058, "step": 12726 }, { "epoch": 2.99, "learning_rate": 9.020940620130258e-10, "loss": 0.0037, "step": 12727 }, { "epoch": 2.99, "learning_rate": 8.68374408892203e-10, "loss": 0.0094, "step": 12728 }, { "epoch": 2.99, "learning_rate": 8.352969980529591e-10, "loss": 0.0087, "step": 12729 }, { "epoch": 2.99, "learning_rate": 8.028618316191506e-10, "loss": 0.0006, "step": 12730 }, { "epoch": 2.99, "learning_rate": 7.710689116746661e-10, "loss": 0.0012, "step": 12731 }, { "epoch": 2.99, "learning_rate": 7.399182402600958e-10, "loss": 0.0151, "step": 12732 }, { "epoch": 2.99, "learning_rate": 7.094098193782817e-10, "loss": 0.0106, "step": 12733 }, { "epoch": 2.99, "learning_rate": 6.795436509876574e-10, "loss": 0.0006, "step": 12734 }, { "epoch": 2.99, "learning_rate": 6.50319737005578e-10, "loss": 0.0052, "step": 12735 }, { "epoch": 2.99, "learning_rate": 6.217380793116512e-10, "loss": 0.0107, "step": 12736 }, { "epoch": 2.99, "learning_rate": 5.937986797388551e-10, "loss": 0.0001, "step": 12737 }, { "epoch": 2.99, "learning_rate": 5.665015400835306e-10, "loss": 0.004, "step": 12738 }, { "epoch": 2.99, "learning_rate": 5.398466620987197e-10, "loss": 0.0004, "step": 12739 }, { "epoch": 2.99, "learning_rate": 5.138340474952763e-10, "loss": 0.0399, "step": 12740 }, { "epoch": 2.99, "learning_rate": 4.884636979451962e-10, "loss": 0.0042, "step": 12741 }, { "epoch": 2.99, "learning_rate": 4.637356150782868e-10, "loss": 0.0049, "step": 12742 }, { "epoch": 2.99, "learning_rate": 4.39649800482167e-10, "loss": 0.0073, "step": 12743 }, { "epoch": 2.99, "learning_rate": 4.1620625570448767e-10, "loss": 0.0048, "step": 12744 }, { "epoch": 2.99, "learning_rate": 3.9340498224960113e-10, "loss": 0.0007, "step": 12745 }, { "epoch": 2.99, "learning_rate": 3.712459815830016e-10, "loss": 0.0003, "step": 12746 }, { "epoch": 2.99, "learning_rate": 3.4972925512799517e-10, "loss": 0.0002, "step": 12747 }, { "epoch": 2.99, "learning_rate": 3.288548042668094e-10, "loss": 0.0005, "step": 12748 }, { "epoch": 2.99, "learning_rate": 3.0862263033948345e-10, "loss": 0.0473, "step": 12749 }, { "epoch": 2.99, "learning_rate": 2.890327346460886e-10, "loss": 0.0019, "step": 12750 }, { "epoch": 2.99, "learning_rate": 2.7008511844450746e-10, "loss": 0.0392, "step": 12751 }, { "epoch": 2.99, "learning_rate": 2.517797829515445e-10, "loss": 0.0005, "step": 12752 }, { "epoch": 2.99, "learning_rate": 2.341167293440361e-10, "loss": 0.0124, "step": 12753 }, { "epoch": 2.99, "learning_rate": 2.1709595875551993e-10, "loss": 0.0076, "step": 12754 }, { "epoch": 2.99, "learning_rate": 2.0071747227956572e-10, "loss": 0.0005, "step": 12755 }, { "epoch": 2.99, "learning_rate": 1.849812709686649e-10, "loss": 0.0009, "step": 12756 }, { "epoch": 2.99, "learning_rate": 1.698873558320102e-10, "loss": 0.0016, "step": 12757 }, { "epoch": 2.99, "learning_rate": 1.5543572784104677e-10, "loss": 0.0007, "step": 12758 }, { "epoch": 2.99, "learning_rate": 1.416263879228108e-10, "loss": 0.0022, "step": 12759 }, { "epoch": 2.99, "learning_rate": 1.2845933696437053e-10, "loss": 0.002, "step": 12760 }, { "epoch": 2.99, "learning_rate": 1.1593457581060564e-10, "loss": 0.0005, "step": 12761 }, { "epoch": 3.0, "learning_rate": 1.0405210526753806e-10, "loss": 0.0034, "step": 12762 }, { "epoch": 3.0, "learning_rate": 9.281192609789102e-11, "loss": 0.0001, "step": 12763 }, { "epoch": 3.0, "learning_rate": 8.221403902441971e-11, "loss": 0.0029, "step": 12764 }, { "epoch": 3.0, "learning_rate": 7.225844472547039e-11, "loss": 0.0002, "step": 12765 }, { "epoch": 3.0, "learning_rate": 6.294514384275197e-11, "loss": 0.0042, "step": 12766 }, { "epoch": 3.0, "learning_rate": 5.4274136973564426e-11, "loss": 0.0037, "step": 12767 }, { "epoch": 3.0, "learning_rate": 4.6245424674129513e-11, "loss": 0.0514, "step": 12768 }, { "epoch": 3.0, "learning_rate": 3.8859007461811147e-11, "loss": 0.0073, "step": 12769 }, { "epoch": 3.0, "learning_rate": 3.211488580956434e-11, "loss": 0.0025, "step": 12770 }, { "epoch": 3.0, "learning_rate": 2.60130601514863e-11, "loss": 0.008, "step": 12771 }, { "epoch": 3.0, "learning_rate": 2.0553530878375527e-11, "loss": 0.0073, "step": 12772 }, { "epoch": 3.0, "learning_rate": 1.573629834217272e-11, "loss": 0.0236, "step": 12773 }, { "epoch": 3.0, "learning_rate": 1.1561362851519875e-11, "loss": 0.0264, "step": 12774 }, { "epoch": 3.0, "learning_rate": 8.02872467509097e-12, "loss": 0.0005, "step": 12775 }, { "epoch": 3.0, "learning_rate": 5.138384039371503e-12, "loss": 0.0055, "step": 12776 }, { "epoch": 3.0, "learning_rate": 2.8903411308789376e-12, "loss": 0.0191, "step": 12777 }, { "epoch": 3.0, "learning_rate": 1.2845960928320466e-12, "loss": 0.0175, "step": 12778 }, { "epoch": 3.0, "learning_rate": 3.2114902848157103e-13, "loss": 0.0034, "step": 12779 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.0012, "step": 12780 }, { "epoch": 3.0, "step": 12780, "total_flos": 1.4235469648756736e+18, "train_loss": 0.050876580332932436, "train_runtime": 29070.2768, "train_samples_per_second": 14.071, "train_steps_per_second": 0.44 } ], "logging_steps": 1.0, "max_steps": 12780, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 4000, "total_flos": 1.4235469648756736e+18, "trial_name": null, "trial_params": null }