diff --git "a/checkpoint-1152/trainer_state.json" "b/checkpoint-1152/trainer_state.json" deleted file mode 100644--- "a/checkpoint-1152/trainer_state.json" +++ /dev/null @@ -1,7000 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.881203694988638, - "global_step": 1152, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 1.5625e-07, - "loss": 3.4895, - "step": 1 - }, - { - "epoch": 0.0, - "learning_rate": 3.125e-07, - "loss": 3.5428, - "step": 2 - }, - { - "epoch": 0.0, - "learning_rate": 4.6875000000000006e-07, - "loss": 3.5726, - "step": 3 - }, - { - "epoch": 0.0, - "learning_rate": 6.25e-07, - "loss": 3.6189, - "step": 4 - }, - { - "epoch": 0.0, - "learning_rate": 7.8125e-07, - "loss": 3.511, - "step": 5 - }, - { - "epoch": 0.0, - "learning_rate": 9.375000000000001e-07, - "loss": 3.4609, - "step": 6 - }, - { - "epoch": 0.01, - "learning_rate": 1.0937500000000001e-06, - "loss": 3.4712, - "step": 7 - }, - { - "epoch": 0.01, - "learning_rate": 1.25e-06, - "loss": 3.5399, - "step": 8 - }, - { - "epoch": 0.01, - "learning_rate": 1.40625e-06, - "loss": 3.4956, - "step": 9 - }, - { - "epoch": 0.01, - "learning_rate": 1.5625e-06, - "loss": 3.5031, - "step": 10 - }, - { - "epoch": 0.01, - "learning_rate": 1.71875e-06, - "loss": 3.5565, - "step": 11 - }, - { - "epoch": 0.01, - "learning_rate": 1.8750000000000003e-06, - "loss": 3.383, - "step": 12 - }, - { - "epoch": 0.01, - "learning_rate": 2.0312500000000002e-06, - "loss": 3.3852, - "step": 13 - }, - { - "epoch": 0.01, - "learning_rate": 2.1875000000000002e-06, - "loss": 3.3688, - "step": 14 - }, - { - "epoch": 0.01, - "learning_rate": 2.3437500000000002e-06, - "loss": 3.3342, - "step": 15 - }, - { - "epoch": 0.01, - "learning_rate": 2.5e-06, - "loss": 3.2689, - "step": 16 - }, - { - "epoch": 0.01, - "learning_rate": 2.65625e-06, - "loss": 3.2455, - "step": 17 - }, - { - "epoch": 0.01, - "learning_rate": 2.8125e-06, - "loss": 3.2564, - "step": 18 - }, - { - "epoch": 0.01, - "learning_rate": 2.96875e-06, - "loss": 3.0662, - "step": 19 - }, - { - "epoch": 0.02, - "learning_rate": 3.125e-06, - "loss": 3.0245, - "step": 20 - }, - { - "epoch": 0.02, - "learning_rate": 3.28125e-06, - "loss": 2.9888, - "step": 21 - }, - { - "epoch": 0.02, - "learning_rate": 3.4375e-06, - "loss": 2.874, - "step": 22 - }, - { - "epoch": 0.02, - "learning_rate": 3.59375e-06, - "loss": 2.7424, - "step": 23 - }, - { - "epoch": 0.02, - "learning_rate": 3.7500000000000005e-06, - "loss": 2.7272, - "step": 24 - }, - { - "epoch": 0.02, - "learning_rate": 3.90625e-06, - "loss": 2.5789, - "step": 25 - }, - { - "epoch": 0.02, - "learning_rate": 4.0625000000000005e-06, - "loss": 2.4029, - "step": 26 - }, - { - "epoch": 0.02, - "learning_rate": 4.21875e-06, - "loss": 2.237, - "step": 27 - }, - { - "epoch": 0.02, - "learning_rate": 4.3750000000000005e-06, - "loss": 2.1258, - "step": 28 - }, - { - "epoch": 0.02, - "learning_rate": 4.53125e-06, - "loss": 1.9172, - "step": 29 - }, - { - "epoch": 0.02, - "learning_rate": 4.6875000000000004e-06, - "loss": 1.8099, - "step": 30 - }, - { - "epoch": 0.02, - "learning_rate": 4.84375e-06, - "loss": 1.7006, - "step": 31 - }, - { - "epoch": 0.02, - "learning_rate": 5e-06, - "loss": 1.5401, - "step": 32 - }, - { - "epoch": 0.03, - "learning_rate": 5.156250000000001e-06, - "loss": 1.4624, - "step": 33 - }, - { - "epoch": 0.03, - "learning_rate": 5.3125e-06, - "loss": 1.3664, - "step": 34 - }, - { - "epoch": 0.03, - "learning_rate": 5.468750000000001e-06, - "loss": 1.2766, - "step": 35 - }, - { - "epoch": 0.03, - "learning_rate": 5.625e-06, - "loss": 1.1971, - "step": 36 - }, - { - "epoch": 0.03, - "learning_rate": 5.781250000000001e-06, - "loss": 1.1606, - "step": 37 - }, - { - "epoch": 0.03, - "learning_rate": 5.9375e-06, - "loss": 1.0934, - "step": 38 - }, - { - "epoch": 0.03, - "learning_rate": 6.093750000000001e-06, - "loss": 1.0819, - "step": 39 - }, - { - "epoch": 0.03, - "learning_rate": 6.25e-06, - "loss": 1.0678, - "step": 40 - }, - { - "epoch": 0.03, - "learning_rate": 6.406250000000001e-06, - "loss": 1.0517, - "step": 41 - }, - { - "epoch": 0.03, - "learning_rate": 6.5625e-06, - "loss": 1.0182, - "step": 42 - }, - { - "epoch": 0.03, - "learning_rate": 6.718750000000001e-06, - "loss": 0.994, - "step": 43 - }, - { - "epoch": 0.03, - "learning_rate": 6.875e-06, - "loss": 0.9798, - "step": 44 - }, - { - "epoch": 0.03, - "learning_rate": 7.031250000000001e-06, - "loss": 0.9874, - "step": 45 - }, - { - "epoch": 0.04, - "learning_rate": 7.1875e-06, - "loss": 0.9626, - "step": 46 - }, - { - "epoch": 0.04, - "learning_rate": 7.343750000000001e-06, - "loss": 0.9631, - "step": 47 - }, - { - "epoch": 0.04, - "learning_rate": 7.500000000000001e-06, - "loss": 0.9222, - "step": 48 - }, - { - "epoch": 0.04, - "learning_rate": 7.656250000000001e-06, - "loss": 0.9402, - "step": 49 - }, - { - "epoch": 0.04, - "learning_rate": 7.8125e-06, - "loss": 0.9179, - "step": 50 - }, - { - "epoch": 0.04, - "learning_rate": 7.96875e-06, - "loss": 0.9019, - "step": 51 - }, - { - "epoch": 0.04, - "learning_rate": 8.125000000000001e-06, - "loss": 0.9063, - "step": 52 - }, - { - "epoch": 0.04, - "learning_rate": 8.281250000000001e-06, - "loss": 0.9219, - "step": 53 - }, - { - "epoch": 0.04, - "learning_rate": 8.4375e-06, - "loss": 0.9168, - "step": 54 - }, - { - "epoch": 0.04, - "learning_rate": 8.59375e-06, - "loss": 0.8971, - "step": 55 - }, - { - "epoch": 0.04, - "learning_rate": 8.750000000000001e-06, - "loss": 0.8797, - "step": 56 - }, - { - "epoch": 0.04, - "learning_rate": 8.906250000000001e-06, - "loss": 0.9042, - "step": 57 - }, - { - "epoch": 0.04, - "learning_rate": 9.0625e-06, - "loss": 0.8862, - "step": 58 - }, - { - "epoch": 0.05, - "learning_rate": 9.21875e-06, - "loss": 0.8587, - "step": 59 - }, - { - "epoch": 0.05, - "learning_rate": 9.375000000000001e-06, - "loss": 0.8852, - "step": 60 - }, - { - "epoch": 0.05, - "learning_rate": 9.531250000000001e-06, - "loss": 0.8456, - "step": 61 - }, - { - "epoch": 0.05, - "learning_rate": 9.6875e-06, - "loss": 0.8573, - "step": 62 - }, - { - "epoch": 0.05, - "learning_rate": 9.84375e-06, - "loss": 0.8602, - "step": 63 - }, - { - "epoch": 0.05, - "learning_rate": 1e-05, - "loss": 0.8527, - "step": 64 - }, - { - "epoch": 0.05, - "learning_rate": 9.999984030281327e-06, - "loss": 0.8608, - "step": 65 - }, - { - "epoch": 0.05, - "learning_rate": 9.999936121227315e-06, - "loss": 0.8444, - "step": 66 - }, - { - "epoch": 0.05, - "learning_rate": 9.999856273144007e-06, - "loss": 0.8354, - "step": 67 - }, - { - "epoch": 0.05, - "learning_rate": 9.99974448654146e-06, - "loss": 0.8175, - "step": 68 - }, - { - "epoch": 0.05, - "learning_rate": 9.999600762133756e-06, - "loss": 0.8482, - "step": 69 - }, - { - "epoch": 0.05, - "learning_rate": 9.99942510083899e-06, - "loss": 0.8259, - "step": 70 - }, - { - "epoch": 0.05, - "learning_rate": 9.999217503779266e-06, - "loss": 0.8521, - "step": 71 - }, - { - "epoch": 0.06, - "learning_rate": 9.998977972280691e-06, - "loss": 0.8328, - "step": 72 - }, - { - "epoch": 0.06, - "learning_rate": 9.998706507873365e-06, - "loss": 0.8309, - "step": 73 - }, - { - "epoch": 0.06, - "learning_rate": 9.998403112291374e-06, - "loss": 0.8383, - "step": 74 - }, - { - "epoch": 0.06, - "learning_rate": 9.998067787472772e-06, - "loss": 0.8284, - "step": 75 - }, - { - "epoch": 0.06, - "learning_rate": 9.997700535559575e-06, - "loss": 0.8049, - "step": 76 - }, - { - "epoch": 0.06, - "learning_rate": 9.997301358897752e-06, - "loss": 0.8144, - "step": 77 - }, - { - "epoch": 0.06, - "learning_rate": 9.996870260037196e-06, - "loss": 0.819, - "step": 78 - }, - { - "epoch": 0.06, - "learning_rate": 9.996407241731718e-06, - "loss": 0.8077, - "step": 79 - }, - { - "epoch": 0.06, - "learning_rate": 9.995912306939026e-06, - "loss": 0.813, - "step": 80 - }, - { - "epoch": 0.06, - "learning_rate": 9.995385458820708e-06, - "loss": 0.8016, - "step": 81 - }, - { - "epoch": 0.06, - "learning_rate": 9.994826700742211e-06, - "loss": 0.7992, - "step": 82 - }, - { - "epoch": 0.06, - "learning_rate": 9.994236036272819e-06, - "loss": 0.8022, - "step": 83 - }, - { - "epoch": 0.06, - "learning_rate": 9.993613469185631e-06, - "loss": 0.7922, - "step": 84 - }, - { - "epoch": 0.07, - "learning_rate": 9.992959003457534e-06, - "loss": 0.8035, - "step": 85 - }, - { - "epoch": 0.07, - "learning_rate": 9.992272643269181e-06, - "loss": 0.8092, - "step": 86 - }, - { - "epoch": 0.07, - "learning_rate": 9.991554393004965e-06, - "loss": 0.8076, - "step": 87 - }, - { - "epoch": 0.07, - "learning_rate": 9.990804257252987e-06, - "loss": 0.784, - "step": 88 - }, - { - "epoch": 0.07, - "learning_rate": 9.99002224080503e-06, - "loss": 0.7834, - "step": 89 - }, - { - "epoch": 0.07, - "learning_rate": 9.989208348656528e-06, - "loss": 0.8092, - "step": 90 - }, - { - "epoch": 0.07, - "learning_rate": 9.988362586006531e-06, - "loss": 0.7941, - "step": 91 - }, - { - "epoch": 0.07, - "learning_rate": 9.987484958257675e-06, - "loss": 0.7952, - "step": 92 - }, - { - "epoch": 0.07, - "learning_rate": 9.986575471016152e-06, - "loss": 0.8039, - "step": 93 - }, - { - "epoch": 0.07, - "learning_rate": 9.985634130091656e-06, - "loss": 0.8036, - "step": 94 - }, - { - "epoch": 0.07, - "learning_rate": 9.984660941497375e-06, - "loss": 0.7878, - "step": 95 - }, - { - "epoch": 0.07, - "learning_rate": 9.983655911449922e-06, - "loss": 0.7923, - "step": 96 - }, - { - "epoch": 0.07, - "learning_rate": 9.982619046369321e-06, - "loss": 0.7793, - "step": 97 - }, - { - "epoch": 0.07, - "learning_rate": 9.981550352878948e-06, - "loss": 0.7868, - "step": 98 - }, - { - "epoch": 0.08, - "learning_rate": 9.980449837805495e-06, - "loss": 0.7756, - "step": 99 - }, - { - "epoch": 0.08, - "learning_rate": 9.979317508178929e-06, - "loss": 0.7951, - "step": 100 - }, - { - "epoch": 0.08, - "learning_rate": 9.978153371232444e-06, - "loss": 0.7923, - "step": 101 - }, - { - "epoch": 0.08, - "learning_rate": 9.976957434402416e-06, - "loss": 0.7926, - "step": 102 - }, - { - "epoch": 0.08, - "learning_rate": 9.975729705328356e-06, - "loss": 0.7874, - "step": 103 - }, - { - "epoch": 0.08, - "learning_rate": 9.974470191852858e-06, - "loss": 0.7956, - "step": 104 - }, - { - "epoch": 0.08, - "learning_rate": 9.973178902021555e-06, - "loss": 0.8077, - "step": 105 - }, - { - "epoch": 0.08, - "learning_rate": 9.971855844083055e-06, - "loss": 0.8009, - "step": 106 - }, - { - "epoch": 0.08, - "learning_rate": 9.97050102648891e-06, - "loss": 0.7801, - "step": 107 - }, - { - "epoch": 0.08, - "learning_rate": 9.96911445789354e-06, - "loss": 0.7759, - "step": 108 - }, - { - "epoch": 0.08, - "learning_rate": 9.967696147154187e-06, - "loss": 0.7591, - "step": 109 - }, - { - "epoch": 0.08, - "learning_rate": 9.966246103330863e-06, - "loss": 0.766, - "step": 110 - }, - { - "epoch": 0.08, - "learning_rate": 9.964764335686284e-06, - "loss": 0.8039, - "step": 111 - }, - { - "epoch": 0.09, - "learning_rate": 9.963250853685813e-06, - "loss": 0.7847, - "step": 112 - }, - { - "epoch": 0.09, - "learning_rate": 9.961705666997406e-06, - "loss": 0.7568, - "step": 113 - }, - { - "epoch": 0.09, - "learning_rate": 9.96012878549154e-06, - "loss": 0.7781, - "step": 114 - }, - { - "epoch": 0.09, - "learning_rate": 9.958520219241156e-06, - "loss": 0.7727, - "step": 115 - }, - { - "epoch": 0.09, - "learning_rate": 9.956879978521596e-06, - "loss": 0.7913, - "step": 116 - }, - { - "epoch": 0.09, - "learning_rate": 9.955208073810532e-06, - "loss": 0.7646, - "step": 117 - }, - { - "epoch": 0.09, - "learning_rate": 9.953504515787902e-06, - "loss": 0.7628, - "step": 118 - }, - { - "epoch": 0.09, - "learning_rate": 9.951769315335843e-06, - "loss": 0.7798, - "step": 119 - }, - { - "epoch": 0.09, - "learning_rate": 9.950002483538626e-06, - "loss": 0.7794, - "step": 120 - }, - { - "epoch": 0.09, - "learning_rate": 9.948204031682566e-06, - "loss": 0.7709, - "step": 121 - }, - { - "epoch": 0.09, - "learning_rate": 9.946373971255975e-06, - "loss": 0.7729, - "step": 122 - }, - { - "epoch": 0.09, - "learning_rate": 9.944512313949071e-06, - "loss": 0.7677, - "step": 123 - }, - { - "epoch": 0.09, - "learning_rate": 9.942619071653914e-06, - "loss": 0.7687, - "step": 124 - }, - { - "epoch": 0.1, - "learning_rate": 9.940694256464322e-06, - "loss": 0.794, - "step": 125 - }, - { - "epoch": 0.1, - "learning_rate": 9.938737880675796e-06, - "loss": 0.7773, - "step": 126 - }, - { - "epoch": 0.1, - "learning_rate": 9.936749956785446e-06, - "loss": 0.7839, - "step": 127 - }, - { - "epoch": 0.1, - "learning_rate": 9.934730497491907e-06, - "loss": 0.774, - "step": 128 - }, - { - "epoch": 0.1, - "eval_loss": 1.3403148651123047, - "eval_runtime": 125.9471, - "eval_samples_per_second": 95.032, - "eval_steps_per_second": 23.764, - "step": 128 - }, - { - "epoch": 0.1, - "learning_rate": 9.932679515695254e-06, - "loss": 0.7666, - "step": 129 - }, - { - "epoch": 0.1, - "learning_rate": 9.930597024496933e-06, - "loss": 0.7804, - "step": 130 - }, - { - "epoch": 0.1, - "learning_rate": 9.92848303719966e-06, - "loss": 0.7914, - "step": 131 - }, - { - "epoch": 0.1, - "learning_rate": 9.926337567307346e-06, - "loss": 0.7687, - "step": 132 - }, - { - "epoch": 0.1, - "learning_rate": 9.924160628525016e-06, - "loss": 0.738, - "step": 133 - }, - { - "epoch": 0.1, - "learning_rate": 9.921952234758708e-06, - "loss": 0.7334, - "step": 134 - }, - { - "epoch": 0.1, - "learning_rate": 9.919712400115393e-06, - "loss": 0.7717, - "step": 135 - }, - { - "epoch": 0.1, - "learning_rate": 9.917441138902883e-06, - "loss": 0.7834, - "step": 136 - }, - { - "epoch": 0.1, - "learning_rate": 9.915138465629737e-06, - "loss": 0.7625, - "step": 137 - }, - { - "epoch": 0.11, - "learning_rate": 9.912804395005176e-06, - "loss": 0.7771, - "step": 138 - }, - { - "epoch": 0.11, - "learning_rate": 9.910438941938977e-06, - "loss": 0.7398, - "step": 139 - }, - { - "epoch": 0.11, - "learning_rate": 9.908042121541392e-06, - "loss": 0.7645, - "step": 140 - }, - { - "epoch": 0.11, - "learning_rate": 9.905613949123036e-06, - "loss": 0.7697, - "step": 141 - }, - { - "epoch": 0.11, - "learning_rate": 9.903154440194804e-06, - "loss": 0.7689, - "step": 142 - }, - { - "epoch": 0.11, - "learning_rate": 9.90066361046776e-06, - "loss": 0.749, - "step": 143 - }, - { - "epoch": 0.11, - "learning_rate": 9.898141475853047e-06, - "loss": 0.7712, - "step": 144 - }, - { - "epoch": 0.11, - "learning_rate": 9.895588052461773e-06, - "loss": 0.7676, - "step": 145 - }, - { - "epoch": 0.11, - "learning_rate": 9.893003356604923e-06, - "loss": 0.7765, - "step": 146 - }, - { - "epoch": 0.11, - "learning_rate": 9.890387404793243e-06, - "loss": 0.7854, - "step": 147 - }, - { - "epoch": 0.11, - "learning_rate": 9.887740213737136e-06, - "loss": 0.7951, - "step": 148 - }, - { - "epoch": 0.11, - "learning_rate": 9.885061800346563e-06, - "loss": 0.7646, - "step": 149 - }, - { - "epoch": 0.11, - "learning_rate": 9.882352181730925e-06, - "loss": 0.7664, - "step": 150 - }, - { - "epoch": 0.12, - "learning_rate": 9.879611375198963e-06, - "loss": 0.7477, - "step": 151 - }, - { - "epoch": 0.12, - "learning_rate": 9.87683939825864e-06, - "loss": 0.7686, - "step": 152 - }, - { - "epoch": 0.12, - "learning_rate": 9.874036268617034e-06, - "loss": 0.7611, - "step": 153 - }, - { - "epoch": 0.12, - "learning_rate": 9.871202004180221e-06, - "loss": 0.7664, - "step": 154 - }, - { - "epoch": 0.12, - "learning_rate": 9.86833662305316e-06, - "loss": 0.7451, - "step": 155 - }, - { - "epoch": 0.12, - "learning_rate": 9.865440143539589e-06, - "loss": 0.774, - "step": 156 - }, - { - "epoch": 0.12, - "learning_rate": 9.86251258414189e-06, - "loss": 0.7695, - "step": 157 - }, - { - "epoch": 0.12, - "learning_rate": 9.859553963560982e-06, - "loss": 0.7446, - "step": 158 - }, - { - "epoch": 0.12, - "learning_rate": 9.856564300696201e-06, - "loss": 0.7797, - "step": 159 - }, - { - "epoch": 0.12, - "learning_rate": 9.85354361464518e-06, - "loss": 0.7668, - "step": 160 - }, - { - "epoch": 0.12, - "learning_rate": 9.850491924703716e-06, - "loss": 0.7617, - "step": 161 - }, - { - "epoch": 0.12, - "learning_rate": 9.847409250365666e-06, - "loss": 0.7408, - "step": 162 - }, - { - "epoch": 0.12, - "learning_rate": 9.844295611322804e-06, - "loss": 0.7612, - "step": 163 - }, - { - "epoch": 0.13, - "learning_rate": 9.841151027464707e-06, - "loss": 0.7597, - "step": 164 - }, - { - "epoch": 0.13, - "learning_rate": 9.837975518878622e-06, - "loss": 0.7566, - "step": 165 - }, - { - "epoch": 0.13, - "learning_rate": 9.834769105849341e-06, - "loss": 0.7747, - "step": 166 - }, - { - "epoch": 0.13, - "learning_rate": 9.83153180885907e-06, - "loss": 0.7662, - "step": 167 - }, - { - "epoch": 0.13, - "learning_rate": 9.828263648587298e-06, - "loss": 0.7692, - "step": 168 - }, - { - "epoch": 0.13, - "learning_rate": 9.824964645910664e-06, - "loss": 0.781, - "step": 169 - }, - { - "epoch": 0.13, - "learning_rate": 9.821634821902825e-06, - "loss": 0.7557, - "step": 170 - }, - { - "epoch": 0.13, - "learning_rate": 9.818274197834326e-06, - "loss": 0.7287, - "step": 171 - }, - { - "epoch": 0.13, - "learning_rate": 9.814882795172451e-06, - "loss": 0.7196, - "step": 172 - }, - { - "epoch": 0.13, - "learning_rate": 9.811460635581102e-06, - "loss": 0.7687, - "step": 173 - }, - { - "epoch": 0.13, - "learning_rate": 9.808007740920647e-06, - "loss": 0.7525, - "step": 174 - }, - { - "epoch": 0.13, - "learning_rate": 9.804524133247788e-06, - "loss": 0.7662, - "step": 175 - }, - { - "epoch": 0.13, - "learning_rate": 9.80100983481542e-06, - "loss": 0.7616, - "step": 176 - }, - { - "epoch": 0.14, - "learning_rate": 9.797464868072489e-06, - "loss": 0.7547, - "step": 177 - }, - { - "epoch": 0.14, - "learning_rate": 9.793889255663838e-06, - "loss": 0.7661, - "step": 178 - }, - { - "epoch": 0.14, - "learning_rate": 9.79028302043008e-06, - "loss": 0.7624, - "step": 179 - }, - { - "epoch": 0.14, - "learning_rate": 9.786646185407438e-06, - "loss": 0.7511, - "step": 180 - }, - { - "epoch": 0.14, - "learning_rate": 9.782978773827607e-06, - "loss": 0.7449, - "step": 181 - }, - { - "epoch": 0.14, - "learning_rate": 9.7792808091176e-06, - "loss": 0.744, - "step": 182 - }, - { - "epoch": 0.14, - "learning_rate": 9.775552314899596e-06, - "loss": 0.7497, - "step": 183 - }, - { - "epoch": 0.14, - "learning_rate": 9.7717933149908e-06, - "loss": 0.7512, - "step": 184 - }, - { - "epoch": 0.14, - "learning_rate": 9.768003833403278e-06, - "loss": 0.7412, - "step": 185 - }, - { - "epoch": 0.14, - "learning_rate": 9.764183894343812e-06, - "loss": 0.7194, - "step": 186 - }, - { - "epoch": 0.14, - "learning_rate": 9.760333522213746e-06, - "loss": 0.7585, - "step": 187 - }, - { - "epoch": 0.14, - "learning_rate": 9.75645274160882e-06, - "loss": 0.7492, - "step": 188 - }, - { - "epoch": 0.14, - "learning_rate": 9.752541577319026e-06, - "loss": 0.7214, - "step": 189 - }, - { - "epoch": 0.15, - "learning_rate": 9.74860005432844e-06, - "loss": 0.7406, - "step": 190 - }, - { - "epoch": 0.15, - "learning_rate": 9.744628197815068e-06, - "loss": 0.7385, - "step": 191 - }, - { - "epoch": 0.15, - "learning_rate": 9.740626033150683e-06, - "loss": 0.7513, - "step": 192 - }, - { - "epoch": 0.15, - "learning_rate": 9.736593585900662e-06, - "loss": 0.736, - "step": 193 - }, - { - "epoch": 0.15, - "learning_rate": 9.732530881823825e-06, - "loss": 0.7295, - "step": 194 - }, - { - "epoch": 0.15, - "learning_rate": 9.728437946872266e-06, - "loss": 0.7546, - "step": 195 - }, - { - "epoch": 0.15, - "learning_rate": 9.724314807191197e-06, - "loss": 0.7377, - "step": 196 - }, - { - "epoch": 0.15, - "learning_rate": 9.720161489118766e-06, - "loss": 0.7383, - "step": 197 - }, - { - "epoch": 0.15, - "learning_rate": 9.715978019185904e-06, - "loss": 0.735, - "step": 198 - }, - { - "epoch": 0.15, - "learning_rate": 9.711764424116146e-06, - "loss": 0.7486, - "step": 199 - }, - { - "epoch": 0.15, - "learning_rate": 9.707520730825461e-06, - "loss": 0.7599, - "step": 200 - }, - { - "epoch": 0.15, - "learning_rate": 9.703246966422088e-06, - "loss": 0.7608, - "step": 201 - }, - { - "epoch": 0.15, - "learning_rate": 9.698943158206351e-06, - "loss": 0.7457, - "step": 202 - }, - { - "epoch": 0.16, - "learning_rate": 9.694609333670493e-06, - "loss": 0.7602, - "step": 203 - }, - { - "epoch": 0.16, - "learning_rate": 9.690245520498496e-06, - "loss": 0.7467, - "step": 204 - }, - { - "epoch": 0.16, - "learning_rate": 9.685851746565908e-06, - "loss": 0.7517, - "step": 205 - }, - { - "epoch": 0.16, - "learning_rate": 9.681428039939664e-06, - "loss": 0.7466, - "step": 206 - }, - { - "epoch": 0.16, - "learning_rate": 9.6769744288779e-06, - "loss": 0.7464, - "step": 207 - }, - { - "epoch": 0.16, - "learning_rate": 9.67249094182979e-06, - "loss": 0.7468, - "step": 208 - }, - { - "epoch": 0.16, - "learning_rate": 9.667977607435338e-06, - "loss": 0.7412, - "step": 209 - }, - { - "epoch": 0.16, - "learning_rate": 9.663434454525218e-06, - "loss": 0.7558, - "step": 210 - }, - { - "epoch": 0.16, - "learning_rate": 9.65886151212058e-06, - "loss": 0.7522, - "step": 211 - }, - { - "epoch": 0.16, - "learning_rate": 9.654258809432865e-06, - "loss": 0.7825, - "step": 212 - }, - { - "epoch": 0.16, - "learning_rate": 9.649626375863622e-06, - "loss": 0.7422, - "step": 213 - }, - { - "epoch": 0.16, - "learning_rate": 9.644964241004312e-06, - "loss": 0.749, - "step": 214 - }, - { - "epoch": 0.16, - "learning_rate": 9.64027243463613e-06, - "loss": 0.7458, - "step": 215 - }, - { - "epoch": 0.17, - "learning_rate": 9.635550986729808e-06, - "loss": 0.7273, - "step": 216 - }, - { - "epoch": 0.17, - "learning_rate": 9.63079992744542e-06, - "loss": 0.7418, - "step": 217 - }, - { - "epoch": 0.17, - "learning_rate": 9.626019287132202e-06, - "loss": 0.7609, - "step": 218 - }, - { - "epoch": 0.17, - "learning_rate": 9.621209096328344e-06, - "loss": 0.7546, - "step": 219 - }, - { - "epoch": 0.17, - "learning_rate": 9.616369385760805e-06, - "loss": 0.7443, - "step": 220 - }, - { - "epoch": 0.17, - "learning_rate": 9.611500186345112e-06, - "loss": 0.7614, - "step": 221 - }, - { - "epoch": 0.17, - "learning_rate": 9.60660152918516e-06, - "loss": 0.7206, - "step": 222 - }, - { - "epoch": 0.17, - "learning_rate": 9.601673445573021e-06, - "loss": 0.7414, - "step": 223 - }, - { - "epoch": 0.17, - "learning_rate": 9.59671596698874e-06, - "loss": 0.7512, - "step": 224 - }, - { - "epoch": 0.17, - "learning_rate": 9.591729125100133e-06, - "loss": 0.7415, - "step": 225 - }, - { - "epoch": 0.17, - "learning_rate": 9.586712951762583e-06, - "loss": 0.736, - "step": 226 - }, - { - "epoch": 0.17, - "learning_rate": 9.58166747901884e-06, - "loss": 0.7469, - "step": 227 - }, - { - "epoch": 0.17, - "learning_rate": 9.576592739098816e-06, - "loss": 0.7419, - "step": 228 - }, - { - "epoch": 0.18, - "learning_rate": 9.571488764419381e-06, - "loss": 0.732, - "step": 229 - }, - { - "epoch": 0.18, - "learning_rate": 9.566355587584149e-06, - "loss": 0.7464, - "step": 230 - }, - { - "epoch": 0.18, - "learning_rate": 9.561193241383277e-06, - "loss": 0.734, - "step": 231 - }, - { - "epoch": 0.18, - "learning_rate": 9.556001758793252e-06, - "loss": 0.7277, - "step": 232 - }, - { - "epoch": 0.18, - "learning_rate": 9.550781172976679e-06, - "loss": 0.7487, - "step": 233 - }, - { - "epoch": 0.18, - "learning_rate": 9.545531517282074e-06, - "loss": 0.7384, - "step": 234 - }, - { - "epoch": 0.18, - "learning_rate": 9.540252825243646e-06, - "loss": 0.7403, - "step": 235 - }, - { - "epoch": 0.18, - "learning_rate": 9.534945130581087e-06, - "loss": 0.7224, - "step": 236 - }, - { - "epoch": 0.18, - "learning_rate": 9.529608467199351e-06, - "loss": 0.7539, - "step": 237 - }, - { - "epoch": 0.18, - "learning_rate": 9.524242869188445e-06, - "loss": 0.7423, - "step": 238 - }, - { - "epoch": 0.18, - "learning_rate": 9.518848370823205e-06, - "loss": 0.727, - "step": 239 - }, - { - "epoch": 0.18, - "learning_rate": 9.51342500656308e-06, - "loss": 0.7174, - "step": 240 - }, - { - "epoch": 0.18, - "learning_rate": 9.507972811051909e-06, - "loss": 0.7391, - "step": 241 - }, - { - "epoch": 0.19, - "learning_rate": 9.502491819117702e-06, - "loss": 0.7353, - "step": 242 - }, - { - "epoch": 0.19, - "learning_rate": 9.496982065772425e-06, - "loss": 0.728, - "step": 243 - }, - { - "epoch": 0.19, - "learning_rate": 9.491443586211756e-06, - "loss": 0.7333, - "step": 244 - }, - { - "epoch": 0.19, - "learning_rate": 9.485876415814882e-06, - "loss": 0.749, - "step": 245 - }, - { - "epoch": 0.19, - "learning_rate": 9.480280590144261e-06, - "loss": 0.7399, - "step": 246 - }, - { - "epoch": 0.19, - "learning_rate": 9.474656144945397e-06, - "loss": 0.7333, - "step": 247 - }, - { - "epoch": 0.19, - "learning_rate": 9.469003116146613e-06, - "loss": 0.7343, - "step": 248 - }, - { - "epoch": 0.19, - "learning_rate": 9.463321539858821e-06, - "loss": 0.75, - "step": 249 - }, - { - "epoch": 0.19, - "learning_rate": 9.45761145237529e-06, - "loss": 0.7316, - "step": 250 - }, - { - "epoch": 0.19, - "learning_rate": 9.451872890171419e-06, - "loss": 0.7311, - "step": 251 - }, - { - "epoch": 0.19, - "learning_rate": 9.446105889904496e-06, - "loss": 0.7395, - "step": 252 - }, - { - "epoch": 0.19, - "learning_rate": 9.440310488413469e-06, - "loss": 0.7362, - "step": 253 - }, - { - "epoch": 0.19, - "learning_rate": 9.434486722718712e-06, - "loss": 0.74, - "step": 254 - }, - { - "epoch": 0.2, - "learning_rate": 9.428634630021783e-06, - "loss": 0.7308, - "step": 255 - }, - { - "epoch": 0.2, - "learning_rate": 9.422754247705192e-06, - "loss": 0.7276, - "step": 256 - }, - { - "epoch": 0.2, - "eval_loss": 1.3016690015792847, - "eval_runtime": 124.5394, - "eval_samples_per_second": 96.106, - "eval_steps_per_second": 24.033, - "step": 256 - }, - { - "epoch": 0.2, - "learning_rate": 9.416845613332162e-06, - "loss": 0.7373, - "step": 257 - }, - { - "epoch": 0.2, - "learning_rate": 9.410908764646383e-06, - "loss": 0.7288, - "step": 258 - }, - { - "epoch": 0.2, - "learning_rate": 9.404943739571774e-06, - "loss": 0.73, - "step": 259 - }, - { - "epoch": 0.2, - "learning_rate": 9.398950576212249e-06, - "loss": 0.7479, - "step": 260 - }, - { - "epoch": 0.2, - "learning_rate": 9.392929312851455e-06, - "loss": 0.7384, - "step": 261 - }, - { - "epoch": 0.2, - "learning_rate": 9.386879987952549e-06, - "loss": 0.745, - "step": 262 - }, - { - "epoch": 0.2, - "learning_rate": 9.380802640157937e-06, - "loss": 0.7251, - "step": 263 - }, - { - "epoch": 0.2, - "learning_rate": 9.374697308289034e-06, - "loss": 0.7368, - "step": 264 - }, - { - "epoch": 0.2, - "learning_rate": 9.36856403134601e-06, - "loss": 0.7431, - "step": 265 - }, - { - "epoch": 0.2, - "learning_rate": 9.362402848507548e-06, - "loss": 0.7431, - "step": 266 - }, - { - "epoch": 0.2, - "learning_rate": 9.356213799130594e-06, - "loss": 0.7181, - "step": 267 - }, - { - "epoch": 0.21, - "learning_rate": 9.349996922750096e-06, - "loss": 0.7467, - "step": 268 - }, - { - "epoch": 0.21, - "learning_rate": 9.343752259078761e-06, - "loss": 0.7489, - "step": 269 - }, - { - "epoch": 0.21, - "learning_rate": 9.337479848006799e-06, - "loss": 0.7277, - "step": 270 - }, - { - "epoch": 0.21, - "learning_rate": 9.331179729601665e-06, - "loss": 0.7474, - "step": 271 - }, - { - "epoch": 0.21, - "learning_rate": 9.324851944107809e-06, - "loss": 0.7171, - "step": 272 - }, - { - "epoch": 0.21, - "learning_rate": 9.318496531946411e-06, - "loss": 0.7664, - "step": 273 - }, - { - "epoch": 0.21, - "learning_rate": 9.312113533715125e-06, - "loss": 0.7243, - "step": 274 - }, - { - "epoch": 0.21, - "learning_rate": 9.305702990187831e-06, - "loss": 0.7242, - "step": 275 - }, - { - "epoch": 0.21, - "learning_rate": 9.299264942314358e-06, - "loss": 0.7237, - "step": 276 - }, - { - "epoch": 0.21, - "learning_rate": 9.292799431220229e-06, - "loss": 0.7108, - "step": 277 - }, - { - "epoch": 0.21, - "learning_rate": 9.286306498206405e-06, - "loss": 0.7467, - "step": 278 - }, - { - "epoch": 0.21, - "learning_rate": 9.27978618474901e-06, - "loss": 0.7445, - "step": 279 - }, - { - "epoch": 0.21, - "learning_rate": 9.273238532499068e-06, - "loss": 0.7154, - "step": 280 - }, - { - "epoch": 0.21, - "learning_rate": 9.266663583282254e-06, - "loss": 0.733, - "step": 281 - }, - { - "epoch": 0.22, - "learning_rate": 9.260061379098596e-06, - "loss": 0.7206, - "step": 282 - }, - { - "epoch": 0.22, - "learning_rate": 9.253431962122234e-06, - "loss": 0.7346, - "step": 283 - }, - { - "epoch": 0.22, - "learning_rate": 9.246775374701139e-06, - "loss": 0.728, - "step": 284 - }, - { - "epoch": 0.22, - "learning_rate": 9.24009165935684e-06, - "loss": 0.7297, - "step": 285 - }, - { - "epoch": 0.22, - "learning_rate": 9.23338085878416e-06, - "loss": 0.7326, - "step": 286 - }, - { - "epoch": 0.22, - "learning_rate": 9.226643015850938e-06, - "loss": 0.7304, - "step": 287 - }, - { - "epoch": 0.22, - "learning_rate": 9.219878173597755e-06, - "loss": 0.7175, - "step": 288 - }, - { - "epoch": 0.22, - "learning_rate": 9.213086375237662e-06, - "loss": 0.7307, - "step": 289 - }, - { - "epoch": 0.22, - "learning_rate": 9.206267664155906e-06, - "loss": 0.7078, - "step": 290 - }, - { - "epoch": 0.22, - "learning_rate": 9.199422083909644e-06, - "loss": 0.7404, - "step": 291 - }, - { - "epoch": 0.22, - "learning_rate": 9.19254967822767e-06, - "loss": 0.7197, - "step": 292 - }, - { - "epoch": 0.22, - "learning_rate": 9.18565049101014e-06, - "loss": 0.7303, - "step": 293 - }, - { - "epoch": 0.22, - "learning_rate": 9.178724566328288e-06, - "loss": 0.7415, - "step": 294 - }, - { - "epoch": 0.23, - "learning_rate": 9.171771948424138e-06, - "loss": 0.7208, - "step": 295 - }, - { - "epoch": 0.23, - "learning_rate": 9.164792681710231e-06, - "loss": 0.7306, - "step": 296 - }, - { - "epoch": 0.23, - "learning_rate": 9.157786810769338e-06, - "loss": 0.7161, - "step": 297 - }, - { - "epoch": 0.23, - "learning_rate": 9.150754380354176e-06, - "loss": 0.7241, - "step": 298 - }, - { - "epoch": 0.23, - "learning_rate": 9.143695435387117e-06, - "loss": 0.7336, - "step": 299 - }, - { - "epoch": 0.23, - "learning_rate": 9.136610020959909e-06, - "loss": 0.7359, - "step": 300 - }, - { - "epoch": 0.23, - "learning_rate": 9.129498182333379e-06, - "loss": 0.7217, - "step": 301 - }, - { - "epoch": 0.23, - "learning_rate": 9.122359964937154e-06, - "loss": 0.7304, - "step": 302 - }, - { - "epoch": 0.23, - "learning_rate": 9.115195414369365e-06, - "loss": 0.7186, - "step": 303 - }, - { - "epoch": 0.23, - "learning_rate": 9.108004576396352e-06, - "loss": 0.7263, - "step": 304 - }, - { - "epoch": 0.23, - "learning_rate": 9.100787496952377e-06, - "loss": 0.7244, - "step": 305 - }, - { - "epoch": 0.23, - "learning_rate": 9.093544222139338e-06, - "loss": 0.7333, - "step": 306 - }, - { - "epoch": 0.23, - "learning_rate": 9.086274798226453e-06, - "loss": 0.7251, - "step": 307 - }, - { - "epoch": 0.24, - "learning_rate": 9.078979271649988e-06, - "loss": 0.7352, - "step": 308 - }, - { - "epoch": 0.24, - "learning_rate": 9.071657689012944e-06, - "loss": 0.7333, - "step": 309 - }, - { - "epoch": 0.24, - "learning_rate": 9.064310097084766e-06, - "loss": 0.7597, - "step": 310 - }, - { - "epoch": 0.24, - "learning_rate": 9.056936542801047e-06, - "loss": 0.7346, - "step": 311 - }, - { - "epoch": 0.24, - "learning_rate": 9.049537073263218e-06, - "loss": 0.7216, - "step": 312 - }, - { - "epoch": 0.24, - "learning_rate": 9.042111735738266e-06, - "loss": 0.7137, - "step": 313 - }, - { - "epoch": 0.24, - "learning_rate": 9.0346605776584e-06, - "loss": 0.7367, - "step": 314 - }, - { - "epoch": 0.24, - "learning_rate": 9.027183646620789e-06, - "loss": 0.7105, - "step": 315 - }, - { - "epoch": 0.24, - "learning_rate": 9.01968099038722e-06, - "loss": 0.7175, - "step": 316 - }, - { - "epoch": 0.24, - "learning_rate": 9.012152656883824e-06, - "loss": 0.7301, - "step": 317 - }, - { - "epoch": 0.24, - "learning_rate": 9.00459869420074e-06, - "loss": 0.741, - "step": 318 - }, - { - "epoch": 0.24, - "learning_rate": 8.99701915059184e-06, - "loss": 0.7401, - "step": 319 - }, - { - "epoch": 0.24, - "learning_rate": 8.98941407447439e-06, - "loss": 0.7176, - "step": 320 - }, - { - "epoch": 0.25, - "learning_rate": 8.981783514428762e-06, - "loss": 0.7192, - "step": 321 - }, - { - "epoch": 0.25, - "learning_rate": 8.974127519198115e-06, - "loss": 0.7188, - "step": 322 - }, - { - "epoch": 0.25, - "learning_rate": 8.966446137688086e-06, - "loss": 0.7165, - "step": 323 - }, - { - "epoch": 0.25, - "learning_rate": 8.958739418966473e-06, - "loss": 0.735, - "step": 324 - }, - { - "epoch": 0.25, - "learning_rate": 8.951007412262928e-06, - "loss": 0.7481, - "step": 325 - }, - { - "epoch": 0.25, - "learning_rate": 8.943250166968645e-06, - "loss": 0.7261, - "step": 326 - }, - { - "epoch": 0.25, - "learning_rate": 8.935467732636027e-06, - "loss": 0.7273, - "step": 327 - }, - { - "epoch": 0.25, - "learning_rate": 8.927660158978392e-06, - "loss": 0.7476, - "step": 328 - }, - { - "epoch": 0.25, - "learning_rate": 8.919827495869645e-06, - "loss": 0.7371, - "step": 329 - }, - { - "epoch": 0.25, - "learning_rate": 8.911969793343953e-06, - "loss": 0.7171, - "step": 330 - }, - { - "epoch": 0.25, - "learning_rate": 8.904087101595435e-06, - "loss": 0.7262, - "step": 331 - }, - { - "epoch": 0.25, - "learning_rate": 8.89617947097784e-06, - "loss": 0.7197, - "step": 332 - }, - { - "epoch": 0.25, - "learning_rate": 8.88824695200422e-06, - "loss": 0.7085, - "step": 333 - }, - { - "epoch": 0.26, - "learning_rate": 8.88028959534662e-06, - "loss": 0.7312, - "step": 334 - }, - { - "epoch": 0.26, - "learning_rate": 8.872307451835733e-06, - "loss": 0.711, - "step": 335 - }, - { - "epoch": 0.26, - "learning_rate": 8.864300572460596e-06, - "loss": 0.728, - "step": 336 - }, - { - "epoch": 0.26, - "learning_rate": 8.85626900836825e-06, - "loss": 0.7274, - "step": 337 - }, - { - "epoch": 0.26, - "learning_rate": 8.84821281086343e-06, - "loss": 0.7384, - "step": 338 - }, - { - "epoch": 0.26, - "learning_rate": 8.84013203140821e-06, - "loss": 0.7394, - "step": 339 - }, - { - "epoch": 0.26, - "learning_rate": 8.832026721621709e-06, - "loss": 0.7384, - "step": 340 - }, - { - "epoch": 0.26, - "learning_rate": 8.823896933279725e-06, - "loss": 0.7205, - "step": 341 - }, - { - "epoch": 0.26, - "learning_rate": 8.815742718314438e-06, - "loss": 0.7282, - "step": 342 - }, - { - "epoch": 0.26, - "learning_rate": 8.807564128814052e-06, - "loss": 0.7218, - "step": 343 - }, - { - "epoch": 0.26, - "learning_rate": 8.799361217022478e-06, - "loss": 0.707, - "step": 344 - }, - { - "epoch": 0.26, - "learning_rate": 8.791134035338991e-06, - "loss": 0.7184, - "step": 345 - }, - { - "epoch": 0.26, - "learning_rate": 8.782882636317904e-06, - "loss": 0.7312, - "step": 346 - }, - { - "epoch": 0.27, - "learning_rate": 8.774607072668225e-06, - "loss": 0.7238, - "step": 347 - }, - { - "epoch": 0.27, - "learning_rate": 8.766307397253325e-06, - "loss": 0.7235, - "step": 348 - }, - { - "epoch": 0.27, - "learning_rate": 8.757983663090593e-06, - "loss": 0.7208, - "step": 349 - }, - { - "epoch": 0.27, - "learning_rate": 8.749635923351108e-06, - "loss": 0.7349, - "step": 350 - }, - { - "epoch": 0.27, - "learning_rate": 8.741264231359293e-06, - "loss": 0.7305, - "step": 351 - }, - { - "epoch": 0.27, - "learning_rate": 8.732868640592573e-06, - "loss": 0.7351, - "step": 352 - }, - { - "epoch": 0.27, - "learning_rate": 8.724449204681036e-06, - "loss": 0.7337, - "step": 353 - }, - { - "epoch": 0.27, - "learning_rate": 8.716005977407094e-06, - "loss": 0.7212, - "step": 354 - }, - { - "epoch": 0.27, - "learning_rate": 8.707539012705133e-06, - "loss": 0.729, - "step": 355 - }, - { - "epoch": 0.27, - "learning_rate": 8.699048364661167e-06, - "loss": 0.721, - "step": 356 - }, - { - "epoch": 0.27, - "learning_rate": 8.690534087512502e-06, - "loss": 0.7261, - "step": 357 - }, - { - "epoch": 0.27, - "learning_rate": 8.681996235647385e-06, - "loss": 0.7331, - "step": 358 - }, - { - "epoch": 0.27, - "learning_rate": 8.67343486360465e-06, - "loss": 0.7045, - "step": 359 - }, - { - "epoch": 0.28, - "learning_rate": 8.664850026073376e-06, - "loss": 0.7315, - "step": 360 - }, - { - "epoch": 0.28, - "learning_rate": 8.656241777892544e-06, - "loss": 0.7326, - "step": 361 - }, - { - "epoch": 0.28, - "learning_rate": 8.647610174050672e-06, - "loss": 0.7477, - "step": 362 - }, - { - "epoch": 0.28, - "learning_rate": 8.638955269685475e-06, - "loss": 0.7128, - "step": 363 - }, - { - "epoch": 0.28, - "learning_rate": 8.630277120083508e-06, - "loss": 0.7217, - "step": 364 - }, - { - "epoch": 0.28, - "learning_rate": 8.621575780679814e-06, - "loss": 0.7161, - "step": 365 - }, - { - "epoch": 0.28, - "learning_rate": 8.612851307057571e-06, - "loss": 0.7037, - "step": 366 - }, - { - "epoch": 0.28, - "learning_rate": 8.604103754947731e-06, - "loss": 0.7301, - "step": 367 - }, - { - "epoch": 0.28, - "learning_rate": 8.595333180228676e-06, - "loss": 0.724, - "step": 368 - }, - { - "epoch": 0.28, - "learning_rate": 8.586539638925851e-06, - "loss": 0.7403, - "step": 369 - }, - { - "epoch": 0.28, - "learning_rate": 8.577723187211404e-06, - "loss": 0.7171, - "step": 370 - }, - { - "epoch": 0.28, - "learning_rate": 8.56888388140384e-06, - "loss": 0.7115, - "step": 371 - }, - { - "epoch": 0.28, - "learning_rate": 8.56002177796765e-06, - "loss": 0.7171, - "step": 372 - }, - { - "epoch": 0.29, - "learning_rate": 8.551136933512952e-06, - "loss": 0.7378, - "step": 373 - }, - { - "epoch": 0.29, - "learning_rate": 8.542229404795133e-06, - "loss": 0.7304, - "step": 374 - }, - { - "epoch": 0.29, - "learning_rate": 8.533299248714483e-06, - "loss": 0.722, - "step": 375 - }, - { - "epoch": 0.29, - "learning_rate": 8.524346522315836e-06, - "loss": 0.7333, - "step": 376 - }, - { - "epoch": 0.29, - "learning_rate": 8.515371282788201e-06, - "loss": 0.7387, - "step": 377 - }, - { - "epoch": 0.29, - "learning_rate": 8.506373587464396e-06, - "loss": 0.7309, - "step": 378 - }, - { - "epoch": 0.29, - "learning_rate": 8.497353493820688e-06, - "loss": 0.7183, - "step": 379 - }, - { - "epoch": 0.29, - "learning_rate": 8.488311059476419e-06, - "loss": 0.7508, - "step": 380 - }, - { - "epoch": 0.29, - "learning_rate": 8.479246342193643e-06, - "loss": 0.7139, - "step": 381 - }, - { - "epoch": 0.29, - "learning_rate": 8.470159399876753e-06, - "loss": 0.7217, - "step": 382 - }, - { - "epoch": 0.29, - "learning_rate": 8.461050290572114e-06, - "loss": 0.7212, - "step": 383 - }, - { - "epoch": 0.29, - "learning_rate": 8.451919072467694e-06, - "loss": 0.7302, - "step": 384 - }, - { - "epoch": 0.29, - "eval_loss": 1.2867591381072998, - "eval_runtime": 125.743, - "eval_samples_per_second": 95.186, - "eval_steps_per_second": 23.803, - "step": 384 - }, - { - "epoch": 0.29, - "learning_rate": 8.442765803892683e-06, - "loss": 0.7221, - "step": 385 - }, - { - "epoch": 0.3, - "learning_rate": 8.433590543317132e-06, - "loss": 0.7119, - "step": 386 - }, - { - "epoch": 0.3, - "learning_rate": 8.424393349351573e-06, - "loss": 0.7325, - "step": 387 - }, - { - "epoch": 0.3, - "learning_rate": 8.415174280746645e-06, - "loss": 0.7297, - "step": 388 - }, - { - "epoch": 0.3, - "learning_rate": 8.405933396392722e-06, - "loss": 0.7269, - "step": 389 - }, - { - "epoch": 0.3, - "learning_rate": 8.396670755319537e-06, - "loss": 0.7146, - "step": 390 - }, - { - "epoch": 0.3, - "learning_rate": 8.38738641669579e-06, - "loss": 0.6906, - "step": 391 - }, - { - "epoch": 0.3, - "learning_rate": 8.378080439828799e-06, - "loss": 0.7537, - "step": 392 - }, - { - "epoch": 0.3, - "learning_rate": 8.368752884164096e-06, - "loss": 0.7129, - "step": 393 - }, - { - "epoch": 0.3, - "learning_rate": 8.359403809285054e-06, - "loss": 0.7295, - "step": 394 - }, - { - "epoch": 0.3, - "learning_rate": 8.350033274912512e-06, - "loss": 0.7289, - "step": 395 - }, - { - "epoch": 0.3, - "learning_rate": 8.340641340904391e-06, - "loss": 0.7383, - "step": 396 - }, - { - "epoch": 0.3, - "learning_rate": 8.33122806725531e-06, - "loss": 0.724, - "step": 397 - }, - { - "epoch": 0.3, - "learning_rate": 8.321793514096195e-06, - "loss": 0.7146, - "step": 398 - }, - { - "epoch": 0.31, - "learning_rate": 8.312337741693917e-06, - "loss": 0.7437, - "step": 399 - }, - { - "epoch": 0.31, - "learning_rate": 8.30286081045088e-06, - "loss": 0.7366, - "step": 400 - }, - { - "epoch": 0.31, - "learning_rate": 8.293362780904662e-06, - "loss": 0.719, - "step": 401 - }, - { - "epoch": 0.31, - "learning_rate": 8.2838437137276e-06, - "loss": 0.7085, - "step": 402 - }, - { - "epoch": 0.31, - "learning_rate": 8.274303669726427e-06, - "loss": 0.7247, - "step": 403 - }, - { - "epoch": 0.31, - "learning_rate": 8.264742709841869e-06, - "loss": 0.7299, - "step": 404 - }, - { - "epoch": 0.31, - "learning_rate": 8.255160895148263e-06, - "loss": 0.7367, - "step": 405 - }, - { - "epoch": 0.31, - "learning_rate": 8.245558286853164e-06, - "loss": 0.706, - "step": 406 - }, - { - "epoch": 0.31, - "learning_rate": 8.23593494629695e-06, - "loss": 0.7351, - "step": 407 - }, - { - "epoch": 0.31, - "learning_rate": 8.226290934952442e-06, - "loss": 0.7173, - "step": 408 - }, - { - "epoch": 0.31, - "learning_rate": 8.216626314424496e-06, - "loss": 0.7385, - "step": 409 - }, - { - "epoch": 0.31, - "learning_rate": 8.206941146449621e-06, - "loss": 0.7226, - "step": 410 - }, - { - "epoch": 0.31, - "learning_rate": 8.19723549289558e-06, - "loss": 0.7345, - "step": 411 - }, - { - "epoch": 0.32, - "learning_rate": 8.187509415760996e-06, - "loss": 0.7178, - "step": 412 - }, - { - "epoch": 0.32, - "learning_rate": 8.177762977174956e-06, - "loss": 0.7214, - "step": 413 - }, - { - "epoch": 0.32, - "learning_rate": 8.167996239396611e-06, - "loss": 0.7212, - "step": 414 - }, - { - "epoch": 0.32, - "learning_rate": 8.158209264814785e-06, - "loss": 0.7257, - "step": 415 - }, - { - "epoch": 0.32, - "learning_rate": 8.14840211594757e-06, - "loss": 0.6969, - "step": 416 - }, - { - "epoch": 0.32, - "learning_rate": 8.138574855441929e-06, - "loss": 0.7235, - "step": 417 - }, - { - "epoch": 0.32, - "learning_rate": 8.128727546073296e-06, - "loss": 0.7091, - "step": 418 - }, - { - "epoch": 0.32, - "learning_rate": 8.118860250745173e-06, - "loss": 0.7068, - "step": 419 - }, - { - "epoch": 0.32, - "learning_rate": 8.108973032488736e-06, - "loss": 0.7238, - "step": 420 - }, - { - "epoch": 0.32, - "learning_rate": 8.099065954462422e-06, - "loss": 0.712, - "step": 421 - }, - { - "epoch": 0.32, - "learning_rate": 8.089139079951526e-06, - "loss": 0.7036, - "step": 422 - }, - { - "epoch": 0.32, - "learning_rate": 8.079192472367812e-06, - "loss": 0.7038, - "step": 423 - }, - { - "epoch": 0.32, - "learning_rate": 8.069226195249087e-06, - "loss": 0.7143, - "step": 424 - }, - { - "epoch": 0.33, - "learning_rate": 8.059240312258806e-06, - "loss": 0.7213, - "step": 425 - }, - { - "epoch": 0.33, - "learning_rate": 8.049234887185667e-06, - "loss": 0.7113, - "step": 426 - }, - { - "epoch": 0.33, - "learning_rate": 8.039209983943201e-06, - "loss": 0.7297, - "step": 427 - }, - { - "epoch": 0.33, - "learning_rate": 8.029165666569361e-06, - "loss": 0.7215, - "step": 428 - }, - { - "epoch": 0.33, - "learning_rate": 8.019101999226115e-06, - "loss": 0.7124, - "step": 429 - }, - { - "epoch": 0.33, - "learning_rate": 8.009019046199036e-06, - "loss": 0.7121, - "step": 430 - }, - { - "epoch": 0.33, - "learning_rate": 7.998916871896899e-06, - "loss": 0.738, - "step": 431 - }, - { - "epoch": 0.33, - "learning_rate": 7.988795540851252e-06, - "loss": 0.7132, - "step": 432 - }, - { - "epoch": 0.33, - "learning_rate": 7.978655117716018e-06, - "loss": 0.7056, - "step": 433 - }, - { - "epoch": 0.33, - "learning_rate": 7.968495667267084e-06, - "loss": 0.7335, - "step": 434 - }, - { - "epoch": 0.33, - "learning_rate": 7.958317254401871e-06, - "loss": 0.6961, - "step": 435 - }, - { - "epoch": 0.33, - "learning_rate": 7.948119944138939e-06, - "loss": 0.7181, - "step": 436 - }, - { - "epoch": 0.33, - "learning_rate": 7.937903801617555e-06, - "loss": 0.7185, - "step": 437 - }, - { - "epoch": 0.34, - "learning_rate": 7.927668892097288e-06, - "loss": 0.6965, - "step": 438 - }, - { - "epoch": 0.34, - "learning_rate": 7.917415280957592e-06, - "loss": 0.7143, - "step": 439 - }, - { - "epoch": 0.34, - "learning_rate": 7.907143033697378e-06, - "loss": 0.6857, - "step": 440 - }, - { - "epoch": 0.34, - "learning_rate": 7.896852215934605e-06, - "loss": 0.7419, - "step": 441 - }, - { - "epoch": 0.34, - "learning_rate": 7.886542893405861e-06, - "loss": 0.7311, - "step": 442 - }, - { - "epoch": 0.34, - "learning_rate": 7.876215131965938e-06, - "loss": 0.7175, - "step": 443 - }, - { - "epoch": 0.34, - "learning_rate": 7.865868997587414e-06, - "loss": 0.7144, - "step": 444 - }, - { - "epoch": 0.34, - "learning_rate": 7.855504556360227e-06, - "loss": 0.7097, - "step": 445 - }, - { - "epoch": 0.34, - "learning_rate": 7.845121874491266e-06, - "loss": 0.7379, - "step": 446 - }, - { - "epoch": 0.34, - "learning_rate": 7.834721018303935e-06, - "loss": 0.7051, - "step": 447 - }, - { - "epoch": 0.34, - "learning_rate": 7.82430205423773e-06, - "loss": 0.7274, - "step": 448 - }, - { - "epoch": 0.34, - "learning_rate": 7.81386504884782e-06, - "loss": 0.7252, - "step": 449 - }, - { - "epoch": 0.34, - "learning_rate": 7.803410068804625e-06, - "loss": 0.7231, - "step": 450 - }, - { - "epoch": 0.34, - "learning_rate": 7.792937180893377e-06, - "loss": 0.7079, - "step": 451 - }, - { - "epoch": 0.35, - "learning_rate": 7.782446452013709e-06, - "loss": 0.7275, - "step": 452 - }, - { - "epoch": 0.35, - "learning_rate": 7.771937949179214e-06, - "loss": 0.7096, - "step": 453 - }, - { - "epoch": 0.35, - "learning_rate": 7.761411739517026e-06, - "loss": 0.7233, - "step": 454 - }, - { - "epoch": 0.35, - "learning_rate": 7.750867890267391e-06, - "loss": 0.7055, - "step": 455 - }, - { - "epoch": 0.35, - "learning_rate": 7.740306468783226e-06, - "loss": 0.7185, - "step": 456 - }, - { - "epoch": 0.35, - "learning_rate": 7.729727542529707e-06, - "loss": 0.7363, - "step": 457 - }, - { - "epoch": 0.35, - "learning_rate": 7.719131179083822e-06, - "loss": 0.7054, - "step": 458 - }, - { - "epoch": 0.35, - "learning_rate": 7.708517446133951e-06, - "loss": 0.7074, - "step": 459 - }, - { - "epoch": 0.35, - "learning_rate": 7.697886411479422e-06, - "loss": 0.7043, - "step": 460 - }, - { - "epoch": 0.35, - "learning_rate": 7.687238143030094e-06, - "loss": 0.7194, - "step": 461 - }, - { - "epoch": 0.35, - "learning_rate": 7.676572708805902e-06, - "loss": 0.707, - "step": 462 - }, - { - "epoch": 0.35, - "learning_rate": 7.665890176936441e-06, - "loss": 0.7249, - "step": 463 - }, - { - "epoch": 0.35, - "learning_rate": 7.655190615660524e-06, - "loss": 0.7069, - "step": 464 - }, - { - "epoch": 0.36, - "learning_rate": 7.644474093325743e-06, - "loss": 0.7066, - "step": 465 - }, - { - "epoch": 0.36, - "learning_rate": 7.633740678388036e-06, - "loss": 0.73, - "step": 466 - }, - { - "epoch": 0.36, - "learning_rate": 7.622990439411251e-06, - "loss": 0.6964, - "step": 467 - }, - { - "epoch": 0.36, - "learning_rate": 7.612223445066706e-06, - "loss": 0.7315, - "step": 468 - }, - { - "epoch": 0.36, - "learning_rate": 7.6014397641327474e-06, - "loss": 0.7227, - "step": 469 - }, - { - "epoch": 0.36, - "learning_rate": 7.590639465494316e-06, - "loss": 0.722, - "step": 470 - }, - { - "epoch": 0.36, - "learning_rate": 7.579822618142505e-06, - "loss": 0.7132, - "step": 471 - }, - { - "epoch": 0.36, - "learning_rate": 7.568989291174118e-06, - "loss": 0.7097, - "step": 472 - }, - { - "epoch": 0.36, - "learning_rate": 7.558139553791227e-06, - "loss": 0.7008, - "step": 473 - }, - { - "epoch": 0.36, - "learning_rate": 7.547273475300734e-06, - "loss": 0.6987, - "step": 474 - }, - { - "epoch": 0.36, - "learning_rate": 7.536391125113926e-06, - "loss": 0.688, - "step": 475 - }, - { - "epoch": 0.36, - "learning_rate": 7.5254925727460335e-06, - "loss": 0.7042, - "step": 476 - }, - { - "epoch": 0.36, - "learning_rate": 7.514577887815779e-06, - "loss": 0.7144, - "step": 477 - }, - { - "epoch": 0.37, - "learning_rate": 7.503647140044943e-06, - "loss": 0.7026, - "step": 478 - }, - { - "epoch": 0.37, - "learning_rate": 7.492700399257913e-06, - "loss": 0.7263, - "step": 479 - }, - { - "epoch": 0.37, - "learning_rate": 7.481737735381236e-06, - "loss": 0.7163, - "step": 480 - }, - { - "epoch": 0.37, - "learning_rate": 7.470759218443177e-06, - "loss": 0.6961, - "step": 481 - }, - { - "epoch": 0.37, - "learning_rate": 7.459764918573264e-06, - "loss": 0.6972, - "step": 482 - }, - { - "epoch": 0.37, - "learning_rate": 7.44875490600185e-06, - "loss": 0.7343, - "step": 483 - }, - { - "epoch": 0.37, - "learning_rate": 7.437729251059657e-06, - "loss": 0.7344, - "step": 484 - }, - { - "epoch": 0.37, - "learning_rate": 7.426688024177326e-06, - "loss": 0.7145, - "step": 485 - }, - { - "epoch": 0.37, - "learning_rate": 7.415631295884972e-06, - "loss": 0.6957, - "step": 486 - }, - { - "epoch": 0.37, - "learning_rate": 7.40455913681173e-06, - "loss": 0.7303, - "step": 487 - }, - { - "epoch": 0.37, - "learning_rate": 7.393471617685308e-06, - "loss": 0.712, - "step": 488 - }, - { - "epoch": 0.37, - "learning_rate": 7.382368809331533e-06, - "loss": 0.6974, - "step": 489 - }, - { - "epoch": 0.37, - "learning_rate": 7.37125078267389e-06, - "loss": 0.7112, - "step": 490 - }, - { - "epoch": 0.38, - "learning_rate": 7.360117608733084e-06, - "loss": 0.7222, - "step": 491 - }, - { - "epoch": 0.38, - "learning_rate": 7.3489693586265785e-06, - "loss": 0.7211, - "step": 492 - }, - { - "epoch": 0.38, - "learning_rate": 7.3378061035681415e-06, - "loss": 0.7099, - "step": 493 - }, - { - "epoch": 0.38, - "learning_rate": 7.326627914867388e-06, - "loss": 0.6936, - "step": 494 - }, - { - "epoch": 0.38, - "learning_rate": 7.315434863929332e-06, - "loss": 0.7165, - "step": 495 - }, - { - "epoch": 0.38, - "learning_rate": 7.304227022253918e-06, - "loss": 0.7313, - "step": 496 - }, - { - "epoch": 0.38, - "learning_rate": 7.2930044614355844e-06, - "loss": 0.7184, - "step": 497 - }, - { - "epoch": 0.38, - "learning_rate": 7.281767253162781e-06, - "loss": 0.721, - "step": 498 - }, - { - "epoch": 0.38, - "learning_rate": 7.2705154692175335e-06, - "loss": 0.7114, - "step": 499 - }, - { - "epoch": 0.38, - "learning_rate": 7.25924918147497e-06, - "loss": 0.7219, - "step": 500 - }, - { - "epoch": 0.38, - "learning_rate": 7.24796846190287e-06, - "loss": 0.7179, - "step": 501 - }, - { - "epoch": 0.38, - "learning_rate": 7.236673382561199e-06, - "loss": 0.7096, - "step": 502 - }, - { - "epoch": 0.38, - "learning_rate": 7.225364015601655e-06, - "loss": 0.7087, - "step": 503 - }, - { - "epoch": 0.39, - "learning_rate": 7.2140404332671986e-06, - "loss": 0.7304, - "step": 504 - }, - { - "epoch": 0.39, - "learning_rate": 7.2027027078916e-06, - "loss": 0.7258, - "step": 505 - }, - { - "epoch": 0.39, - "learning_rate": 7.191350911898975e-06, - "loss": 0.7023, - "step": 506 - }, - { - "epoch": 0.39, - "learning_rate": 7.17998511780332e-06, - "loss": 0.678, - "step": 507 - }, - { - "epoch": 0.39, - "learning_rate": 7.1686053982080454e-06, - "loss": 0.6853, - "step": 508 - }, - { - "epoch": 0.39, - "learning_rate": 7.157211825805519e-06, - "loss": 0.7079, - "step": 509 - }, - { - "epoch": 0.39, - "learning_rate": 7.1458044733766026e-06, - "loss": 0.7136, - "step": 510 - }, - { - "epoch": 0.39, - "learning_rate": 7.134383413790178e-06, - "loss": 0.7125, - "step": 511 - }, - { - "epoch": 0.39, - "learning_rate": 7.122948720002689e-06, - "loss": 0.6938, - "step": 512 - }, - { - "epoch": 0.39, - "eval_loss": 1.2771650552749634, - "eval_runtime": 125.1558, - "eval_samples_per_second": 95.633, - "eval_steps_per_second": 23.914, - "step": 512 - }, - { - "epoch": 0.39, - "learning_rate": 7.111500465057673e-06, - "loss": 0.7068, - "step": 513 - }, - { - "epoch": 0.39, - "learning_rate": 7.100038722085291e-06, - "loss": 0.7007, - "step": 514 - }, - { - "epoch": 0.39, - "learning_rate": 7.088563564301874e-06, - "loss": 0.7191, - "step": 515 - }, - { - "epoch": 0.39, - "learning_rate": 7.0770750650094335e-06, - "loss": 0.7086, - "step": 516 - }, - { - "epoch": 0.4, - "learning_rate": 7.065573297595211e-06, - "loss": 0.7097, - "step": 517 - }, - { - "epoch": 0.4, - "learning_rate": 7.054058335531203e-06, - "loss": 0.7117, - "step": 518 - }, - { - "epoch": 0.4, - "learning_rate": 7.0425302523736906e-06, - "loss": 0.715, - "step": 519 - }, - { - "epoch": 0.4, - "learning_rate": 7.030989121762773e-06, - "loss": 0.6838, - "step": 520 - }, - { - "epoch": 0.4, - "learning_rate": 7.019435017421894e-06, - "loss": 0.7068, - "step": 521 - }, - { - "epoch": 0.4, - "learning_rate": 7.0078680131573704e-06, - "loss": 0.7363, - "step": 522 - }, - { - "epoch": 0.4, - "learning_rate": 6.996288182857924e-06, - "loss": 0.7056, - "step": 523 - }, - { - "epoch": 0.4, - "learning_rate": 6.98469560049421e-06, - "loss": 0.717, - "step": 524 - }, - { - "epoch": 0.4, - "learning_rate": 6.9730903401183384e-06, - "loss": 0.73, - "step": 525 - }, - { - "epoch": 0.4, - "learning_rate": 6.961472475863406e-06, - "loss": 0.7257, - "step": 526 - }, - { - "epoch": 0.4, - "learning_rate": 6.949842081943021e-06, - "loss": 0.7016, - "step": 527 - }, - { - "epoch": 0.4, - "learning_rate": 6.938199232650834e-06, - "loss": 0.7329, - "step": 528 - }, - { - "epoch": 0.4, - "learning_rate": 6.926544002360055e-06, - "loss": 0.7395, - "step": 529 - }, - { - "epoch": 0.41, - "learning_rate": 6.914876465522984e-06, - "loss": 0.7116, - "step": 530 - }, - { - "epoch": 0.41, - "learning_rate": 6.9031966966705326e-06, - "loss": 0.7127, - "step": 531 - }, - { - "epoch": 0.41, - "learning_rate": 6.89150477041175e-06, - "loss": 0.7076, - "step": 532 - }, - { - "epoch": 0.41, - "learning_rate": 6.879800761433345e-06, - "loss": 0.7042, - "step": 533 - }, - { - "epoch": 0.41, - "learning_rate": 6.868084744499211e-06, - "loss": 0.7058, - "step": 534 - }, - { - "epoch": 0.41, - "learning_rate": 6.856356794449945e-06, - "loss": 0.7024, - "step": 535 - }, - { - "epoch": 0.41, - "learning_rate": 6.844616986202371e-06, - "loss": 0.7031, - "step": 536 - }, - { - "epoch": 0.41, - "learning_rate": 6.832865394749065e-06, - "loss": 0.6902, - "step": 537 - }, - { - "epoch": 0.41, - "learning_rate": 6.821102095157871e-06, - "loss": 0.6961, - "step": 538 - }, - { - "epoch": 0.41, - "learning_rate": 6.809327162571422e-06, - "loss": 0.7104, - "step": 539 - }, - { - "epoch": 0.41, - "learning_rate": 6.797540672206661e-06, - "loss": 0.7076, - "step": 540 - }, - { - "epoch": 0.41, - "learning_rate": 6.785742699354365e-06, - "loss": 0.7202, - "step": 541 - }, - { - "epoch": 0.41, - "learning_rate": 6.773933319378656e-06, - "loss": 0.6869, - "step": 542 - }, - { - "epoch": 0.42, - "learning_rate": 6.7621126077165235e-06, - "loss": 0.7141, - "step": 543 - }, - { - "epoch": 0.42, - "learning_rate": 6.750280639877343e-06, - "loss": 0.7161, - "step": 544 - }, - { - "epoch": 0.42, - "learning_rate": 6.738437491442395e-06, - "loss": 0.6855, - "step": 545 - }, - { - "epoch": 0.42, - "learning_rate": 6.726583238064379e-06, - "loss": 0.6893, - "step": 546 - }, - { - "epoch": 0.42, - "learning_rate": 6.714717955466931e-06, - "loss": 0.7095, - "step": 547 - }, - { - "epoch": 0.42, - "learning_rate": 6.702841719444141e-06, - "loss": 0.7148, - "step": 548 - }, - { - "epoch": 0.42, - "learning_rate": 6.690954605860068e-06, - "loss": 0.7168, - "step": 549 - }, - { - "epoch": 0.42, - "learning_rate": 6.679056690648256e-06, - "loss": 0.703, - "step": 550 - }, - { - "epoch": 0.42, - "learning_rate": 6.667148049811249e-06, - "loss": 0.7157, - "step": 551 - }, - { - "epoch": 0.42, - "learning_rate": 6.655228759420105e-06, - "loss": 0.6975, - "step": 552 - }, - { - "epoch": 0.42, - "learning_rate": 6.643298895613909e-06, - "loss": 0.7071, - "step": 553 - }, - { - "epoch": 0.42, - "learning_rate": 6.631358534599288e-06, - "loss": 0.699, - "step": 554 - }, - { - "epoch": 0.42, - "learning_rate": 6.619407752649926e-06, - "loss": 0.7278, - "step": 555 - }, - { - "epoch": 0.43, - "learning_rate": 6.607446626106071e-06, - "loss": 0.7217, - "step": 556 - }, - { - "epoch": 0.43, - "learning_rate": 6.595475231374057e-06, - "loss": 0.7182, - "step": 557 - }, - { - "epoch": 0.43, - "learning_rate": 6.583493644925803e-06, - "loss": 0.7345, - "step": 558 - }, - { - "epoch": 0.43, - "learning_rate": 6.571501943298335e-06, - "loss": 0.7009, - "step": 559 - }, - { - "epoch": 0.43, - "learning_rate": 6.559500203093296e-06, - "loss": 0.7243, - "step": 560 - }, - { - "epoch": 0.43, - "learning_rate": 6.547488500976451e-06, - "loss": 0.722, - "step": 561 - }, - { - "epoch": 0.43, - "learning_rate": 6.535466913677201e-06, - "loss": 0.7009, - "step": 562 - }, - { - "epoch": 0.43, - "learning_rate": 6.52343551798809e-06, - "loss": 0.7066, - "step": 563 - }, - { - "epoch": 0.43, - "learning_rate": 6.511394390764326e-06, - "loss": 0.7046, - "step": 564 - }, - { - "epoch": 0.43, - "learning_rate": 6.4993436089232705e-06, - "loss": 0.6972, - "step": 565 - }, - { - "epoch": 0.43, - "learning_rate": 6.487283249443963e-06, - "loss": 0.7082, - "step": 566 - }, - { - "epoch": 0.43, - "learning_rate": 6.475213389366623e-06, - "loss": 0.7205, - "step": 567 - }, - { - "epoch": 0.43, - "learning_rate": 6.463134105792155e-06, - "loss": 0.7044, - "step": 568 - }, - { - "epoch": 0.44, - "learning_rate": 6.451045475881668e-06, - "loss": 0.6977, - "step": 569 - }, - { - "epoch": 0.44, - "learning_rate": 6.4389475768559675e-06, - "loss": 0.697, - "step": 570 - }, - { - "epoch": 0.44, - "learning_rate": 6.426840485995072e-06, - "loss": 0.6949, - "step": 571 - }, - { - "epoch": 0.44, - "learning_rate": 6.414724280637713e-06, - "loss": 0.6855, - "step": 572 - }, - { - "epoch": 0.44, - "learning_rate": 6.402599038180849e-06, - "loss": 0.709, - "step": 573 - }, - { - "epoch": 0.44, - "learning_rate": 6.3904648360791645e-06, - "loss": 0.7135, - "step": 574 - }, - { - "epoch": 0.44, - "learning_rate": 6.378321751844578e-06, - "loss": 0.7113, - "step": 575 - }, - { - "epoch": 0.44, - "learning_rate": 6.36616986304574e-06, - "loss": 0.7191, - "step": 576 - }, - { - "epoch": 0.44, - "learning_rate": 6.354009247307556e-06, - "loss": 0.7105, - "step": 577 - }, - { - "epoch": 0.44, - "learning_rate": 6.341839982310665e-06, - "loss": 0.6997, - "step": 578 - }, - { - "epoch": 0.44, - "learning_rate": 6.329662145790964e-06, - "loss": 0.703, - "step": 579 - }, - { - "epoch": 0.44, - "learning_rate": 6.317475815539104e-06, - "loss": 0.6934, - "step": 580 - }, - { - "epoch": 0.44, - "learning_rate": 6.305281069399989e-06, - "loss": 0.7149, - "step": 581 - }, - { - "epoch": 0.45, - "learning_rate": 6.293077985272288e-06, - "loss": 0.6968, - "step": 582 - }, - { - "epoch": 0.45, - "learning_rate": 6.280866641107926e-06, - "loss": 0.7212, - "step": 583 - }, - { - "epoch": 0.45, - "learning_rate": 6.268647114911599e-06, - "loss": 0.7255, - "step": 584 - }, - { - "epoch": 0.45, - "learning_rate": 6.25641948474026e-06, - "loss": 0.7096, - "step": 585 - }, - { - "epoch": 0.45, - "learning_rate": 6.244183828702639e-06, - "loss": 0.6911, - "step": 586 - }, - { - "epoch": 0.45, - "learning_rate": 6.23194022495873e-06, - "loss": 0.7146, - "step": 587 - }, - { - "epoch": 0.45, - "learning_rate": 6.219688751719293e-06, - "loss": 0.7078, - "step": 588 - }, - { - "epoch": 0.45, - "learning_rate": 6.2074294872453626e-06, - "loss": 0.7111, - "step": 589 - }, - { - "epoch": 0.45, - "learning_rate": 6.195162509847738e-06, - "loss": 0.7068, - "step": 590 - }, - { - "epoch": 0.45, - "learning_rate": 6.182887897886495e-06, - "loss": 0.7055, - "step": 591 - }, - { - "epoch": 0.45, - "learning_rate": 6.17060572977047e-06, - "loss": 0.7064, - "step": 592 - }, - { - "epoch": 0.45, - "learning_rate": 6.158316083956772e-06, - "loss": 0.6935, - "step": 593 - }, - { - "epoch": 0.45, - "learning_rate": 6.146019038950275e-06, - "loss": 0.7179, - "step": 594 - }, - { - "epoch": 0.46, - "learning_rate": 6.13371467330312e-06, - "loss": 0.7198, - "step": 595 - }, - { - "epoch": 0.46, - "learning_rate": 6.121403065614207e-06, - "loss": 0.7302, - "step": 596 - }, - { - "epoch": 0.46, - "learning_rate": 6.109084294528704e-06, - "loss": 0.7056, - "step": 597 - }, - { - "epoch": 0.46, - "learning_rate": 6.096758438737534e-06, - "loss": 0.7166, - "step": 598 - }, - { - "epoch": 0.46, - "learning_rate": 6.084425576976873e-06, - "loss": 0.7053, - "step": 599 - }, - { - "epoch": 0.46, - "learning_rate": 6.072085788027659e-06, - "loss": 0.6997, - "step": 600 - }, - { - "epoch": 0.46, - "learning_rate": 6.059739150715073e-06, - "loss": 0.7226, - "step": 601 - }, - { - "epoch": 0.46, - "learning_rate": 6.047385743908045e-06, - "loss": 0.7111, - "step": 602 - }, - { - "epoch": 0.46, - "learning_rate": 6.035025646518747e-06, - "loss": 0.7113, - "step": 603 - }, - { - "epoch": 0.46, - "learning_rate": 6.02265893750209e-06, - "loss": 0.7229, - "step": 604 - }, - { - "epoch": 0.46, - "learning_rate": 6.010285695855222e-06, - "loss": 0.7102, - "step": 605 - }, - { - "epoch": 0.46, - "learning_rate": 5.9979060006170164e-06, - "loss": 0.7292, - "step": 606 - }, - { - "epoch": 0.46, - "learning_rate": 5.9855199308675724e-06, - "loss": 0.7001, - "step": 607 - }, - { - "epoch": 0.47, - "learning_rate": 5.973127565727711e-06, - "loss": 0.7106, - "step": 608 - }, - { - "epoch": 0.47, - "learning_rate": 5.960728984358466e-06, - "loss": 0.7014, - "step": 609 - }, - { - "epoch": 0.47, - "learning_rate": 5.948324265960581e-06, - "loss": 0.7062, - "step": 610 - }, - { - "epoch": 0.47, - "learning_rate": 5.935913489774e-06, - "loss": 0.7019, - "step": 611 - }, - { - "epoch": 0.47, - "learning_rate": 5.923496735077364e-06, - "loss": 0.6939, - "step": 612 - }, - { - "epoch": 0.47, - "learning_rate": 5.9110740811875055e-06, - "loss": 0.7224, - "step": 613 - }, - { - "epoch": 0.47, - "learning_rate": 5.898645607458941e-06, - "loss": 0.7202, - "step": 614 - }, - { - "epoch": 0.47, - "learning_rate": 5.886211393283359e-06, - "loss": 0.7187, - "step": 615 - }, - { - "epoch": 0.47, - "learning_rate": 5.873771518089124e-06, - "loss": 0.7084, - "step": 616 - }, - { - "epoch": 0.47, - "learning_rate": 5.861326061340756e-06, - "loss": 0.7274, - "step": 617 - }, - { - "epoch": 0.47, - "learning_rate": 5.848875102538435e-06, - "loss": 0.7115, - "step": 618 - }, - { - "epoch": 0.47, - "learning_rate": 5.836418721217483e-06, - "loss": 0.6843, - "step": 619 - }, - { - "epoch": 0.47, - "learning_rate": 5.823956996947861e-06, - "loss": 0.7063, - "step": 620 - }, - { - "epoch": 0.48, - "learning_rate": 5.811490009333662e-06, - "loss": 0.7013, - "step": 621 - }, - { - "epoch": 0.48, - "learning_rate": 5.7990178380126026e-06, - "loss": 0.7141, - "step": 622 - }, - { - "epoch": 0.48, - "learning_rate": 5.786540562655507e-06, - "loss": 0.7127, - "step": 623 - }, - { - "epoch": 0.48, - "learning_rate": 5.774058262965806e-06, - "loss": 0.7009, - "step": 624 - }, - { - "epoch": 0.48, - "learning_rate": 5.761571018679025e-06, - "loss": 0.7071, - "step": 625 - }, - { - "epoch": 0.48, - "learning_rate": 5.749078909562279e-06, - "loss": 0.7211, - "step": 626 - }, - { - "epoch": 0.48, - "learning_rate": 5.736582015413752e-06, - "loss": 0.701, - "step": 627 - }, - { - "epoch": 0.48, - "learning_rate": 5.724080416062198e-06, - "loss": 0.691, - "step": 628 - }, - { - "epoch": 0.48, - "learning_rate": 5.711574191366427e-06, - "loss": 0.7211, - "step": 629 - }, - { - "epoch": 0.48, - "learning_rate": 5.699063421214794e-06, - "loss": 0.688, - "step": 630 - }, - { - "epoch": 0.48, - "learning_rate": 5.686548185524694e-06, - "loss": 0.7312, - "step": 631 - }, - { - "epoch": 0.48, - "learning_rate": 5.674028564242042e-06, - "loss": 0.6939, - "step": 632 - }, - { - "epoch": 0.48, - "learning_rate": 5.66150463734077e-06, - "loss": 0.7012, - "step": 633 - }, - { - "epoch": 0.48, - "learning_rate": 5.648976484822313e-06, - "loss": 0.6963, - "step": 634 - }, - { - "epoch": 0.49, - "learning_rate": 5.636444186715101e-06, - "loss": 0.7171, - "step": 635 - }, - { - "epoch": 0.49, - "learning_rate": 5.623907823074044e-06, - "loss": 0.7037, - "step": 636 - }, - { - "epoch": 0.49, - "learning_rate": 5.611367473980022e-06, - "loss": 0.713, - "step": 637 - }, - { - "epoch": 0.49, - "learning_rate": 5.598823219539373e-06, - "loss": 0.7142, - "step": 638 - }, - { - "epoch": 0.49, - "learning_rate": 5.5862751398833824e-06, - "loss": 0.7061, - "step": 639 - }, - { - "epoch": 0.49, - "learning_rate": 5.573723315167773e-06, - "loss": 0.6989, - "step": 640 - }, - { - "epoch": 0.49, - "eval_loss": 1.2723302841186523, - "eval_runtime": 126.4363, - "eval_samples_per_second": 94.664, - "eval_steps_per_second": 23.672, - "step": 640 - }, - { - "epoch": 0.49, - "learning_rate": 5.561167825572187e-06, - "loss": 0.6954, - "step": 641 - }, - { - "epoch": 0.49, - "learning_rate": 5.54860875129968e-06, - "loss": 0.7098, - "step": 642 - }, - { - "epoch": 0.49, - "learning_rate": 5.5360461725762045e-06, - "loss": 0.7096, - "step": 643 - }, - { - "epoch": 0.49, - "learning_rate": 5.5234801696500996e-06, - "loss": 0.7219, - "step": 644 - }, - { - "epoch": 0.49, - "learning_rate": 5.51091082279158e-06, - "loss": 0.7174, - "step": 645 - }, - { - "epoch": 0.49, - "learning_rate": 5.498338212292216e-06, - "loss": 0.7046, - "step": 646 - }, - { - "epoch": 0.49, - "learning_rate": 5.48576241846443e-06, - "loss": 0.6943, - "step": 647 - }, - { - "epoch": 0.5, - "learning_rate": 5.473183521640978e-06, - "loss": 0.724, - "step": 648 - }, - { - "epoch": 0.5, - "learning_rate": 5.460601602174437e-06, - "loss": 0.7001, - "step": 649 - }, - { - "epoch": 0.5, - "learning_rate": 5.448016740436693e-06, - "loss": 0.682, - "step": 650 - }, - { - "epoch": 0.5, - "learning_rate": 5.435429016818428e-06, - "loss": 0.6889, - "step": 651 - }, - { - "epoch": 0.5, - "learning_rate": 5.422838511728601e-06, - "loss": 0.7084, - "step": 652 - }, - { - "epoch": 0.5, - "learning_rate": 5.410245305593942e-06, - "loss": 0.7108, - "step": 653 - }, - { - "epoch": 0.5, - "learning_rate": 5.397649478858437e-06, - "loss": 0.7307, - "step": 654 - }, - { - "epoch": 0.5, - "learning_rate": 5.3850511119828095e-06, - "loss": 0.6986, - "step": 655 - }, - { - "epoch": 0.5, - "learning_rate": 5.3724502854440085e-06, - "loss": 0.7042, - "step": 656 - }, - { - "epoch": 0.5, - "learning_rate": 5.3598470797346945e-06, - "loss": 0.6955, - "step": 657 - }, - { - "epoch": 0.5, - "learning_rate": 5.347241575362729e-06, - "loss": 0.7235, - "step": 658 - }, - { - "epoch": 0.5, - "learning_rate": 5.334633852850657e-06, - "loss": 0.7169, - "step": 659 - }, - { - "epoch": 0.5, - "learning_rate": 5.322023992735188e-06, - "loss": 0.7076, - "step": 660 - }, - { - "epoch": 0.51, - "learning_rate": 5.309412075566692e-06, - "loss": 0.7115, - "step": 661 - }, - { - "epoch": 0.51, - "learning_rate": 5.296798181908673e-06, - "loss": 0.7154, - "step": 662 - }, - { - "epoch": 0.51, - "learning_rate": 5.284182392337269e-06, - "loss": 0.7133, - "step": 663 - }, - { - "epoch": 0.51, - "learning_rate": 5.271564787440723e-06, - "loss": 0.6954, - "step": 664 - }, - { - "epoch": 0.51, - "learning_rate": 5.258945447818872e-06, - "loss": 0.6985, - "step": 665 - }, - { - "epoch": 0.51, - "learning_rate": 5.24632445408264e-06, - "loss": 0.6894, - "step": 666 - }, - { - "epoch": 0.51, - "learning_rate": 5.233701886853515e-06, - "loss": 0.6973, - "step": 667 - }, - { - "epoch": 0.51, - "learning_rate": 5.221077826763035e-06, - "loss": 0.719, - "step": 668 - }, - { - "epoch": 0.51, - "learning_rate": 5.208452354452275e-06, - "loss": 0.7113, - "step": 669 - }, - { - "epoch": 0.51, - "learning_rate": 5.1958255505713305e-06, - "loss": 0.6917, - "step": 670 - }, - { - "epoch": 0.51, - "learning_rate": 5.1831974957788075e-06, - "loss": 0.7074, - "step": 671 - }, - { - "epoch": 0.51, - "learning_rate": 5.1705682707412965e-06, - "loss": 0.727, - "step": 672 - }, - { - "epoch": 0.51, - "learning_rate": 5.157937956132866e-06, - "loss": 0.7054, - "step": 673 - }, - { - "epoch": 0.52, - "learning_rate": 5.145306632634544e-06, - "loss": 0.7132, - "step": 674 - }, - { - "epoch": 0.52, - "learning_rate": 5.132674380933803e-06, - "loss": 0.7042, - "step": 675 - }, - { - "epoch": 0.52, - "learning_rate": 5.120041281724048e-06, - "loss": 0.7062, - "step": 676 - }, - { - "epoch": 0.52, - "learning_rate": 5.107407415704094e-06, - "loss": 0.7076, - "step": 677 - }, - { - "epoch": 0.52, - "learning_rate": 5.094772863577655e-06, - "loss": 0.6934, - "step": 678 - }, - { - "epoch": 0.52, - "learning_rate": 5.082137706052826e-06, - "loss": 0.6957, - "step": 679 - }, - { - "epoch": 0.52, - "learning_rate": 5.069502023841576e-06, - "loss": 0.7104, - "step": 680 - }, - { - "epoch": 0.52, - "learning_rate": 5.056865897659219e-06, - "loss": 0.7048, - "step": 681 - }, - { - "epoch": 0.52, - "learning_rate": 5.044229408223906e-06, - "loss": 0.7057, - "step": 682 - }, - { - "epoch": 0.52, - "learning_rate": 5.031592636256111e-06, - "loss": 0.7183, - "step": 683 - }, - { - "epoch": 0.52, - "learning_rate": 5.018955662478109e-06, - "loss": 0.6956, - "step": 684 - }, - { - "epoch": 0.52, - "learning_rate": 5.0063185676134695e-06, - "loss": 0.7088, - "step": 685 - }, - { - "epoch": 0.52, - "learning_rate": 4.993681432386532e-06, - "loss": 0.7059, - "step": 686 - }, - { - "epoch": 0.53, - "learning_rate": 4.981044337521892e-06, - "loss": 0.7231, - "step": 687 - }, - { - "epoch": 0.53, - "learning_rate": 4.968407363743891e-06, - "loss": 0.7042, - "step": 688 - }, - { - "epoch": 0.53, - "learning_rate": 4.955770591776095e-06, - "loss": 0.6963, - "step": 689 - }, - { - "epoch": 0.53, - "learning_rate": 4.943134102340782e-06, - "loss": 0.697, - "step": 690 - }, - { - "epoch": 0.53, - "learning_rate": 4.9304979761584256e-06, - "loss": 0.6979, - "step": 691 - }, - { - "epoch": 0.53, - "learning_rate": 4.9178622939471746e-06, - "loss": 0.6939, - "step": 692 - }, - { - "epoch": 0.53, - "learning_rate": 4.905227136422347e-06, - "loss": 0.7049, - "step": 693 - }, - { - "epoch": 0.53, - "learning_rate": 4.892592584295907e-06, - "loss": 0.7023, - "step": 694 - }, - { - "epoch": 0.53, - "learning_rate": 4.8799587182759526e-06, - "loss": 0.7277, - "step": 695 - }, - { - "epoch": 0.53, - "learning_rate": 4.867325619066198e-06, - "loss": 0.7335, - "step": 696 - }, - { - "epoch": 0.53, - "learning_rate": 4.854693367365457e-06, - "loss": 0.7116, - "step": 697 - }, - { - "epoch": 0.53, - "learning_rate": 4.842062043867136e-06, - "loss": 0.704, - "step": 698 - }, - { - "epoch": 0.53, - "learning_rate": 4.829431729258705e-06, - "loss": 0.7154, - "step": 699 - }, - { - "epoch": 0.54, - "learning_rate": 4.816802504221194e-06, - "loss": 0.6967, - "step": 700 - }, - { - "epoch": 0.54, - "learning_rate": 4.804174449428671e-06, - "loss": 0.7116, - "step": 701 - }, - { - "epoch": 0.54, - "learning_rate": 4.791547645547727e-06, - "loss": 0.713, - "step": 702 - }, - { - "epoch": 0.54, - "learning_rate": 4.778922173236968e-06, - "loss": 0.6949, - "step": 703 - }, - { - "epoch": 0.54, - "learning_rate": 4.7662981131464866e-06, - "loss": 0.6969, - "step": 704 - }, - { - "epoch": 0.54, - "learning_rate": 4.753675545917361e-06, - "loss": 0.6958, - "step": 705 - }, - { - "epoch": 0.54, - "learning_rate": 4.74105455218113e-06, - "loss": 0.7099, - "step": 706 - }, - { - "epoch": 0.54, - "learning_rate": 4.728435212559278e-06, - "loss": 0.6937, - "step": 707 - }, - { - "epoch": 0.54, - "learning_rate": 4.715817607662731e-06, - "loss": 0.7165, - "step": 708 - }, - { - "epoch": 0.54, - "learning_rate": 4.703201818091326e-06, - "loss": 0.711, - "step": 709 - }, - { - "epoch": 0.54, - "learning_rate": 4.690587924433309e-06, - "loss": 0.6951, - "step": 710 - }, - { - "epoch": 0.54, - "learning_rate": 4.677976007264812e-06, - "loss": 0.7308, - "step": 711 - }, - { - "epoch": 0.54, - "learning_rate": 4.665366147149343e-06, - "loss": 0.693, - "step": 712 - }, - { - "epoch": 0.55, - "learning_rate": 4.652758424637271e-06, - "loss": 0.6987, - "step": 713 - }, - { - "epoch": 0.55, - "learning_rate": 4.640152920265306e-06, - "loss": 0.7145, - "step": 714 - }, - { - "epoch": 0.55, - "learning_rate": 4.627549714555993e-06, - "loss": 0.7104, - "step": 715 - }, - { - "epoch": 0.55, - "learning_rate": 4.614948888017191e-06, - "loss": 0.7041, - "step": 716 - }, - { - "epoch": 0.55, - "learning_rate": 4.6023505211415635e-06, - "loss": 0.6999, - "step": 717 - }, - { - "epoch": 0.55, - "learning_rate": 4.589754694406058e-06, - "loss": 0.7177, - "step": 718 - }, - { - "epoch": 0.55, - "learning_rate": 4.577161488271401e-06, - "loss": 0.707, - "step": 719 - }, - { - "epoch": 0.55, - "learning_rate": 4.564570983181573e-06, - "loss": 0.6847, - "step": 720 - }, - { - "epoch": 0.55, - "learning_rate": 4.5519832595633076e-06, - "loss": 0.6904, - "step": 721 - }, - { - "epoch": 0.55, - "learning_rate": 4.539398397825563e-06, - "loss": 0.7115, - "step": 722 - }, - { - "epoch": 0.55, - "learning_rate": 4.526816478359023e-06, - "loss": 0.7155, - "step": 723 - }, - { - "epoch": 0.55, - "learning_rate": 4.514237581535571e-06, - "loss": 0.7139, - "step": 724 - }, - { - "epoch": 0.55, - "learning_rate": 4.501661787707784e-06, - "loss": 0.7214, - "step": 725 - }, - { - "epoch": 0.56, - "learning_rate": 4.489089177208421e-06, - "loss": 0.7107, - "step": 726 - }, - { - "epoch": 0.56, - "learning_rate": 4.4765198303499e-06, - "loss": 0.7114, - "step": 727 - }, - { - "epoch": 0.56, - "learning_rate": 4.463953827423796e-06, - "loss": 0.7309, - "step": 728 - }, - { - "epoch": 0.56, - "learning_rate": 4.45139124870032e-06, - "loss": 0.6868, - "step": 729 - }, - { - "epoch": 0.56, - "learning_rate": 4.438832174427815e-06, - "loss": 0.7026, - "step": 730 - }, - { - "epoch": 0.56, - "learning_rate": 4.426276684832229e-06, - "loss": 0.695, - "step": 731 - }, - { - "epoch": 0.56, - "learning_rate": 4.413724860116619e-06, - "loss": 0.7076, - "step": 732 - }, - { - "epoch": 0.56, - "learning_rate": 4.40117678046063e-06, - "loss": 0.7125, - "step": 733 - }, - { - "epoch": 0.56, - "learning_rate": 4.388632526019981e-06, - "loss": 0.7007, - "step": 734 - }, - { - "epoch": 0.56, - "learning_rate": 4.3760921769259585e-06, - "loss": 0.6994, - "step": 735 - }, - { - "epoch": 0.56, - "learning_rate": 4.363555813284902e-06, - "loss": 0.7028, - "step": 736 - }, - { - "epoch": 0.56, - "learning_rate": 4.351023515177688e-06, - "loss": 0.7047, - "step": 737 - }, - { - "epoch": 0.56, - "learning_rate": 4.338495362659233e-06, - "loss": 0.7143, - "step": 738 - }, - { - "epoch": 0.57, - "learning_rate": 4.325971435757961e-06, - "loss": 0.7092, - "step": 739 - }, - { - "epoch": 0.57, - "learning_rate": 4.3134518144753085e-06, - "loss": 0.7156, - "step": 740 - }, - { - "epoch": 0.57, - "learning_rate": 4.300936578785207e-06, - "loss": 0.7018, - "step": 741 - }, - { - "epoch": 0.57, - "learning_rate": 4.2884258086335755e-06, - "loss": 0.6915, - "step": 742 - }, - { - "epoch": 0.57, - "learning_rate": 4.275919583937805e-06, - "loss": 0.6891, - "step": 743 - }, - { - "epoch": 0.57, - "learning_rate": 4.263417984586251e-06, - "loss": 0.7028, - "step": 744 - }, - { - "epoch": 0.57, - "learning_rate": 4.250921090437723e-06, - "loss": 0.71, - "step": 745 - }, - { - "epoch": 0.57, - "learning_rate": 4.2384289813209754e-06, - "loss": 0.6996, - "step": 746 - }, - { - "epoch": 0.57, - "learning_rate": 4.225941737034196e-06, - "loss": 0.7116, - "step": 747 - }, - { - "epoch": 0.57, - "learning_rate": 4.213459437344496e-06, - "loss": 0.7016, - "step": 748 - }, - { - "epoch": 0.57, - "learning_rate": 4.2009821619874e-06, - "loss": 0.7103, - "step": 749 - }, - { - "epoch": 0.57, - "learning_rate": 4.188509990666339e-06, - "loss": 0.6894, - "step": 750 - }, - { - "epoch": 0.57, - "learning_rate": 4.1760430030521405e-06, - "loss": 0.7042, - "step": 751 - }, - { - "epoch": 0.58, - "learning_rate": 4.163581278782519e-06, - "loss": 0.7124, - "step": 752 - }, - { - "epoch": 0.58, - "learning_rate": 4.151124897461567e-06, - "loss": 0.6786, - "step": 753 - }, - { - "epoch": 0.58, - "learning_rate": 4.138673938659245e-06, - "loss": 0.7049, - "step": 754 - }, - { - "epoch": 0.58, - "learning_rate": 4.126228481910877e-06, - "loss": 0.7045, - "step": 755 - }, - { - "epoch": 0.58, - "learning_rate": 4.113788606716641e-06, - "loss": 0.677, - "step": 756 - }, - { - "epoch": 0.58, - "learning_rate": 4.101354392541061e-06, - "loss": 0.6974, - "step": 757 - }, - { - "epoch": 0.58, - "learning_rate": 4.088925918812496e-06, - "loss": 0.7318, - "step": 758 - }, - { - "epoch": 0.58, - "learning_rate": 4.076503264922637e-06, - "loss": 0.7107, - "step": 759 - }, - { - "epoch": 0.58, - "learning_rate": 4.064086510226002e-06, - "loss": 0.6988, - "step": 760 - }, - { - "epoch": 0.58, - "learning_rate": 4.05167573403942e-06, - "loss": 0.6958, - "step": 761 - }, - { - "epoch": 0.58, - "learning_rate": 4.039271015641535e-06, - "loss": 0.6882, - "step": 762 - }, - { - "epoch": 0.58, - "learning_rate": 4.026872434272291e-06, - "loss": 0.7055, - "step": 763 - }, - { - "epoch": 0.58, - "learning_rate": 4.014480069132428e-06, - "loss": 0.7141, - "step": 764 - }, - { - "epoch": 0.59, - "learning_rate": 4.002093999382985e-06, - "loss": 0.7035, - "step": 765 - }, - { - "epoch": 0.59, - "learning_rate": 3.9897143041447794e-06, - "loss": 0.7042, - "step": 766 - }, - { - "epoch": 0.59, - "learning_rate": 3.977341062497911e-06, - "loss": 0.6956, - "step": 767 - }, - { - "epoch": 0.59, - "learning_rate": 3.964974353481254e-06, - "loss": 0.6955, - "step": 768 - }, - { - "epoch": 0.59, - "eval_loss": 1.2692409753799438, - "eval_runtime": 126.0431, - "eval_samples_per_second": 94.96, - "eval_steps_per_second": 23.746, - "step": 768 - }, - { - "epoch": 0.59, - "learning_rate": 3.9526142560919566e-06, - "loss": 0.6964, - "step": 769 - }, - { - "epoch": 0.59, - "learning_rate": 3.940260849284928e-06, - "loss": 0.6884, - "step": 770 - }, - { - "epoch": 0.59, - "learning_rate": 3.927914211972342e-06, - "loss": 0.6978, - "step": 771 - }, - { - "epoch": 0.59, - "learning_rate": 3.915574423023128e-06, - "loss": 0.7013, - "step": 772 - }, - { - "epoch": 0.59, - "learning_rate": 3.903241561262468e-06, - "loss": 0.7124, - "step": 773 - }, - { - "epoch": 0.59, - "learning_rate": 3.890915705471297e-06, - "loss": 0.7024, - "step": 774 - }, - { - "epoch": 0.59, - "learning_rate": 3.878596934385794e-06, - "loss": 0.7031, - "step": 775 - }, - { - "epoch": 0.59, - "learning_rate": 3.866285326696883e-06, - "loss": 0.7138, - "step": 776 - }, - { - "epoch": 0.59, - "learning_rate": 3.853980961049727e-06, - "loss": 0.7161, - "step": 777 - }, - { - "epoch": 0.6, - "learning_rate": 3.841683916043229e-06, - "loss": 0.7032, - "step": 778 - }, - { - "epoch": 0.6, - "learning_rate": 3.829394270229531e-06, - "loss": 0.7102, - "step": 779 - }, - { - "epoch": 0.6, - "learning_rate": 3.817112102113506e-06, - "loss": 0.6979, - "step": 780 - }, - { - "epoch": 0.6, - "learning_rate": 3.804837490152262e-06, - "loss": 0.683, - "step": 781 - }, - { - "epoch": 0.6, - "learning_rate": 3.7925705127546387e-06, - "loss": 0.6864, - "step": 782 - }, - { - "epoch": 0.6, - "learning_rate": 3.780311248280708e-06, - "loss": 0.7354, - "step": 783 - }, - { - "epoch": 0.6, - "learning_rate": 3.7680597750412715e-06, - "loss": 0.6988, - "step": 784 - }, - { - "epoch": 0.6, - "learning_rate": 3.755816171297362e-06, - "loss": 0.689, - "step": 785 - }, - { - "epoch": 0.6, - "learning_rate": 3.7435805152597416e-06, - "loss": 0.696, - "step": 786 - }, - { - "epoch": 0.6, - "learning_rate": 3.7313528850884028e-06, - "loss": 0.7027, - "step": 787 - }, - { - "epoch": 0.6, - "learning_rate": 3.7191333588920745e-06, - "loss": 0.687, - "step": 788 - }, - { - "epoch": 0.6, - "learning_rate": 3.706922014727714e-06, - "loss": 0.7056, - "step": 789 - }, - { - "epoch": 0.6, - "learning_rate": 3.694718930600012e-06, - "loss": 0.7121, - "step": 790 - }, - { - "epoch": 0.61, - "learning_rate": 3.6825241844608973e-06, - "loss": 0.6855, - "step": 791 - }, - { - "epoch": 0.61, - "learning_rate": 3.6703378542090373e-06, - "loss": 0.7134, - "step": 792 - }, - { - "epoch": 0.61, - "learning_rate": 3.658160017689337e-06, - "loss": 0.7107, - "step": 793 - }, - { - "epoch": 0.61, - "learning_rate": 3.6459907526924467e-06, - "loss": 0.709, - "step": 794 - }, - { - "epoch": 0.61, - "learning_rate": 3.6338301369542604e-06, - "loss": 0.7103, - "step": 795 - }, - { - "epoch": 0.61, - "learning_rate": 3.6216782481554236e-06, - "loss": 0.7255, - "step": 796 - }, - { - "epoch": 0.61, - "learning_rate": 3.609535163920836e-06, - "loss": 0.703, - "step": 797 - }, - { - "epoch": 0.61, - "learning_rate": 3.5974009618191512e-06, - "loss": 0.6917, - "step": 798 - }, - { - "epoch": 0.61, - "learning_rate": 3.5852757193622878e-06, - "loss": 0.7056, - "step": 799 - }, - { - "epoch": 0.61, - "learning_rate": 3.5731595140049296e-06, - "loss": 0.6945, - "step": 800 - }, - { - "epoch": 0.61, - "learning_rate": 3.5610524231440324e-06, - "loss": 0.6958, - "step": 801 - }, - { - "epoch": 0.61, - "learning_rate": 3.5489545241183322e-06, - "loss": 0.698, - "step": 802 - }, - { - "epoch": 0.61, - "learning_rate": 3.536865894207845e-06, - "loss": 0.6973, - "step": 803 - }, - { - "epoch": 0.62, - "learning_rate": 3.5247866106333795e-06, - "loss": 0.7151, - "step": 804 - }, - { - "epoch": 0.62, - "learning_rate": 3.512716750556037e-06, - "loss": 0.7113, - "step": 805 - }, - { - "epoch": 0.62, - "learning_rate": 3.50065639107673e-06, - "loss": 0.7088, - "step": 806 - }, - { - "epoch": 0.62, - "learning_rate": 3.488605609235674e-06, - "loss": 0.6899, - "step": 807 - }, - { - "epoch": 0.62, - "learning_rate": 3.4765644820119093e-06, - "loss": 0.7063, - "step": 808 - }, - { - "epoch": 0.62, - "learning_rate": 3.464533086322801e-06, - "loss": 0.6992, - "step": 809 - }, - { - "epoch": 0.62, - "learning_rate": 3.45251149902355e-06, - "loss": 0.7083, - "step": 810 - }, - { - "epoch": 0.62, - "learning_rate": 3.4404997969067043e-06, - "loss": 0.7025, - "step": 811 - }, - { - "epoch": 0.62, - "learning_rate": 3.428498056701665e-06, - "loss": 0.7017, - "step": 812 - }, - { - "epoch": 0.62, - "learning_rate": 3.4165063550741985e-06, - "loss": 0.7221, - "step": 813 - }, - { - "epoch": 0.62, - "learning_rate": 3.4045247686259436e-06, - "loss": 0.6963, - "step": 814 - }, - { - "epoch": 0.62, - "learning_rate": 3.3925533738939286e-06, - "loss": 0.7003, - "step": 815 - }, - { - "epoch": 0.62, - "learning_rate": 3.380592247350075e-06, - "loss": 0.6958, - "step": 816 - }, - { - "epoch": 0.62, - "learning_rate": 3.368641465400713e-06, - "loss": 0.6989, - "step": 817 - }, - { - "epoch": 0.63, - "learning_rate": 3.356701104386092e-06, - "loss": 0.7141, - "step": 818 - }, - { - "epoch": 0.63, - "learning_rate": 3.344771240579897e-06, - "loss": 0.709, - "step": 819 - }, - { - "epoch": 0.63, - "learning_rate": 3.3328519501887535e-06, - "loss": 0.6989, - "step": 820 - }, - { - "epoch": 0.63, - "learning_rate": 3.320943309351746e-06, - "loss": 0.6851, - "step": 821 - }, - { - "epoch": 0.63, - "learning_rate": 3.3090453941399346e-06, - "loss": 0.7048, - "step": 822 - }, - { - "epoch": 0.63, - "learning_rate": 3.2971582805558622e-06, - "loss": 0.7083, - "step": 823 - }, - { - "epoch": 0.63, - "learning_rate": 3.285282044533072e-06, - "loss": 0.7161, - "step": 824 - }, - { - "epoch": 0.63, - "learning_rate": 3.2734167619356237e-06, - "loss": 0.6925, - "step": 825 - }, - { - "epoch": 0.63, - "learning_rate": 3.2615625085576062e-06, - "loss": 0.6981, - "step": 826 - }, - { - "epoch": 0.63, - "learning_rate": 3.249719360122659e-06, - "loss": 0.7107, - "step": 827 - }, - { - "epoch": 0.63, - "learning_rate": 3.23788739228348e-06, - "loss": 0.7204, - "step": 828 - }, - { - "epoch": 0.63, - "learning_rate": 3.226066680621347e-06, - "loss": 0.6901, - "step": 829 - }, - { - "epoch": 0.63, - "learning_rate": 3.2142573006456377e-06, - "loss": 0.7023, - "step": 830 - }, - { - "epoch": 0.64, - "learning_rate": 3.20245932779334e-06, - "loss": 0.709, - "step": 831 - }, - { - "epoch": 0.64, - "learning_rate": 3.190672837428581e-06, - "loss": 0.7008, - "step": 832 - }, - { - "epoch": 0.64, - "learning_rate": 3.178897904842132e-06, - "loss": 0.705, - "step": 833 - }, - { - "epoch": 0.64, - "learning_rate": 3.167134605250938e-06, - "loss": 0.7034, - "step": 834 - }, - { - "epoch": 0.64, - "learning_rate": 3.155383013797631e-06, - "loss": 0.6976, - "step": 835 - }, - { - "epoch": 0.64, - "learning_rate": 3.1436432055500583e-06, - "loss": 0.7154, - "step": 836 - }, - { - "epoch": 0.64, - "learning_rate": 3.131915255500792e-06, - "loss": 0.7226, - "step": 837 - }, - { - "epoch": 0.64, - "learning_rate": 3.120199238566658e-06, - "loss": 0.7073, - "step": 838 - }, - { - "epoch": 0.64, - "learning_rate": 3.1084952295882533e-06, - "loss": 0.7182, - "step": 839 - }, - { - "epoch": 0.64, - "learning_rate": 3.096803303329469e-06, - "loss": 0.7134, - "step": 840 - }, - { - "epoch": 0.64, - "learning_rate": 3.0851235344770176e-06, - "loss": 0.6802, - "step": 841 - }, - { - "epoch": 0.64, - "learning_rate": 3.073455997639946e-06, - "loss": 0.6957, - "step": 842 - }, - { - "epoch": 0.64, - "learning_rate": 3.0618007673491673e-06, - "loss": 0.7119, - "step": 843 - }, - { - "epoch": 0.65, - "learning_rate": 3.0501579180569795e-06, - "loss": 0.7178, - "step": 844 - }, - { - "epoch": 0.65, - "learning_rate": 3.0385275241365965e-06, - "loss": 0.6878, - "step": 845 - }, - { - "epoch": 0.65, - "learning_rate": 3.0269096598816636e-06, - "loss": 0.6955, - "step": 846 - }, - { - "epoch": 0.65, - "learning_rate": 3.015304399505791e-06, - "loss": 0.697, - "step": 847 - }, - { - "epoch": 0.65, - "learning_rate": 3.0037118171420764e-06, - "loss": 0.7147, - "step": 848 - }, - { - "epoch": 0.65, - "learning_rate": 2.9921319868426304e-06, - "loss": 0.6789, - "step": 849 - }, - { - "epoch": 0.65, - "learning_rate": 2.9805649825781073e-06, - "loss": 0.6975, - "step": 850 - }, - { - "epoch": 0.65, - "learning_rate": 2.9690108782372282e-06, - "loss": 0.6798, - "step": 851 - }, - { - "epoch": 0.65, - "learning_rate": 2.957469747626311e-06, - "loss": 0.6772, - "step": 852 - }, - { - "epoch": 0.65, - "learning_rate": 2.945941664468799e-06, - "loss": 0.7134, - "step": 853 - }, - { - "epoch": 0.65, - "learning_rate": 2.9344267024047906e-06, - "loss": 0.6944, - "step": 854 - }, - { - "epoch": 0.65, - "learning_rate": 2.9229249349905686e-06, - "loss": 0.6855, - "step": 855 - }, - { - "epoch": 0.65, - "learning_rate": 2.9114364356981274e-06, - "loss": 0.7047, - "step": 856 - }, - { - "epoch": 0.66, - "learning_rate": 2.8999612779147086e-06, - "loss": 0.7053, - "step": 857 - }, - { - "epoch": 0.66, - "learning_rate": 2.8884995349423286e-06, - "loss": 0.6965, - "step": 858 - }, - { - "epoch": 0.66, - "learning_rate": 2.877051279997312e-06, - "loss": 0.7024, - "step": 859 - }, - { - "epoch": 0.66, - "learning_rate": 2.8656165862098227e-06, - "loss": 0.693, - "step": 860 - }, - { - "epoch": 0.66, - "learning_rate": 2.854195526623399e-06, - "loss": 0.6933, - "step": 861 - }, - { - "epoch": 0.66, - "learning_rate": 2.8427881741944824e-06, - "loss": 0.6949, - "step": 862 - }, - { - "epoch": 0.66, - "learning_rate": 2.8313946017919575e-06, - "loss": 0.7189, - "step": 863 - }, - { - "epoch": 0.66, - "learning_rate": 2.820014882196683e-06, - "loss": 0.696, - "step": 864 - }, - { - "epoch": 0.66, - "learning_rate": 2.808649088101024e-06, - "loss": 0.7092, - "step": 865 - }, - { - "epoch": 0.66, - "learning_rate": 2.7972972921083997e-06, - "loss": 0.7002, - "step": 866 - }, - { - "epoch": 0.66, - "learning_rate": 2.7859595667328027e-06, - "loss": 0.709, - "step": 867 - }, - { - "epoch": 0.66, - "learning_rate": 2.7746359843983466e-06, - "loss": 0.7134, - "step": 868 - }, - { - "epoch": 0.66, - "learning_rate": 2.7633266174388018e-06, - "loss": 0.6943, - "step": 869 - }, - { - "epoch": 0.67, - "learning_rate": 2.7520315380971313e-06, - "loss": 0.6965, - "step": 870 - }, - { - "epoch": 0.67, - "learning_rate": 2.740750818525031e-06, - "loss": 0.7196, - "step": 871 - }, - { - "epoch": 0.67, - "learning_rate": 2.729484530782468e-06, - "loss": 0.7176, - "step": 872 - }, - { - "epoch": 0.67, - "learning_rate": 2.7182327468372206e-06, - "loss": 0.6975, - "step": 873 - }, - { - "epoch": 0.67, - "learning_rate": 2.7069955385644164e-06, - "loss": 0.7052, - "step": 874 - }, - { - "epoch": 0.67, - "learning_rate": 2.6957729777460817e-06, - "loss": 0.7142, - "step": 875 - }, - { - "epoch": 0.67, - "learning_rate": 2.6845651360706703e-06, - "loss": 0.7064, - "step": 876 - }, - { - "epoch": 0.67, - "learning_rate": 2.6733720851326127e-06, - "loss": 0.7057, - "step": 877 - }, - { - "epoch": 0.67, - "learning_rate": 2.6621938964318593e-06, - "loss": 0.715, - "step": 878 - }, - { - "epoch": 0.67, - "learning_rate": 2.6510306413734223e-06, - "loss": 0.6897, - "step": 879 - }, - { - "epoch": 0.67, - "learning_rate": 2.6398823912669175e-06, - "loss": 0.6922, - "step": 880 - }, - { - "epoch": 0.67, - "learning_rate": 2.6287492173261132e-06, - "loss": 0.7188, - "step": 881 - }, - { - "epoch": 0.67, - "learning_rate": 2.6176311906684704e-06, - "loss": 0.706, - "step": 882 - }, - { - "epoch": 0.68, - "learning_rate": 2.6065283823146916e-06, - "loss": 0.6942, - "step": 883 - }, - { - "epoch": 0.68, - "learning_rate": 2.59544086318827e-06, - "loss": 0.7151, - "step": 884 - }, - { - "epoch": 0.68, - "learning_rate": 2.5843687041150294e-06, - "loss": 0.7047, - "step": 885 - }, - { - "epoch": 0.68, - "learning_rate": 2.5733119758226756e-06, - "loss": 0.6886, - "step": 886 - }, - { - "epoch": 0.68, - "learning_rate": 2.5622707489403436e-06, - "loss": 0.7075, - "step": 887 - }, - { - "epoch": 0.68, - "learning_rate": 2.5512450939981494e-06, - "loss": 0.7002, - "step": 888 - }, - { - "epoch": 0.68, - "learning_rate": 2.5402350814267364e-06, - "loss": 0.7007, - "step": 889 - }, - { - "epoch": 0.68, - "learning_rate": 2.529240781556824e-06, - "loss": 0.7147, - "step": 890 - }, - { - "epoch": 0.68, - "learning_rate": 2.5182622646187648e-06, - "loss": 0.7103, - "step": 891 - }, - { - "epoch": 0.68, - "learning_rate": 2.5072996007420868e-06, - "loss": 0.6987, - "step": 892 - }, - { - "epoch": 0.68, - "learning_rate": 2.496352859955057e-06, - "loss": 0.7111, - "step": 893 - }, - { - "epoch": 0.68, - "learning_rate": 2.4854221121842213e-06, - "loss": 0.7049, - "step": 894 - }, - { - "epoch": 0.68, - "learning_rate": 2.4745074272539677e-06, - "loss": 0.683, - "step": 895 - }, - { - "epoch": 0.69, - "learning_rate": 2.4636088748860738e-06, - "loss": 0.7179, - "step": 896 - }, - { - "epoch": 0.69, - "eval_loss": 1.2681297063827515, - "eval_runtime": 126.6345, - "eval_samples_per_second": 94.516, - "eval_steps_per_second": 23.635, - "step": 896 - }, - { - "epoch": 0.69, - "learning_rate": 2.4527265246992667e-06, - "loss": 0.704, - "step": 897 - }, - { - "epoch": 0.69, - "learning_rate": 2.4418604462087743e-06, - "loss": 0.6825, - "step": 898 - }, - { - "epoch": 0.69, - "learning_rate": 2.4310107088258834e-06, - "loss": 0.7164, - "step": 899 - }, - { - "epoch": 0.69, - "learning_rate": 2.4201773818574956e-06, - "loss": 0.6916, - "step": 900 - }, - { - "epoch": 0.69, - "learning_rate": 2.4093605345056833e-06, - "loss": 0.7176, - "step": 901 - }, - { - "epoch": 0.69, - "learning_rate": 2.398560235867252e-06, - "loss": 0.7117, - "step": 902 - }, - { - "epoch": 0.69, - "learning_rate": 2.3877765549332936e-06, - "loss": 0.6892, - "step": 903 - }, - { - "epoch": 0.69, - "learning_rate": 2.3770095605887487e-06, - "loss": 0.7112, - "step": 904 - }, - { - "epoch": 0.69, - "learning_rate": 2.366259321611965e-06, - "loss": 0.7141, - "step": 905 - }, - { - "epoch": 0.69, - "learning_rate": 2.3555259066742593e-06, - "loss": 0.6946, - "step": 906 - }, - { - "epoch": 0.69, - "learning_rate": 2.3448093843394782e-06, - "loss": 0.6983, - "step": 907 - }, - { - "epoch": 0.69, - "learning_rate": 2.334109823063562e-06, - "loss": 0.7059, - "step": 908 - }, - { - "epoch": 0.7, - "learning_rate": 2.3234272911940998e-06, - "loss": 0.6979, - "step": 909 - }, - { - "epoch": 0.7, - "learning_rate": 2.312761856969908e-06, - "loss": 0.694, - "step": 910 - }, - { - "epoch": 0.7, - "learning_rate": 2.302113588520578e-06, - "loss": 0.6865, - "step": 911 - }, - { - "epoch": 0.7, - "learning_rate": 2.2914825538660507e-06, - "loss": 0.6993, - "step": 912 - }, - { - "epoch": 0.7, - "learning_rate": 2.2808688209161797e-06, - "loss": 0.697, - "step": 913 - }, - { - "epoch": 0.7, - "learning_rate": 2.2702724574702956e-06, - "loss": 0.7274, - "step": 914 - }, - { - "epoch": 0.7, - "learning_rate": 2.2596935312167766e-06, - "loss": 0.676, - "step": 915 - }, - { - "epoch": 0.7, - "learning_rate": 2.2491321097326125e-06, - "loss": 0.7063, - "step": 916 - }, - { - "epoch": 0.7, - "learning_rate": 2.2385882604829763e-06, - "loss": 0.6942, - "step": 917 - }, - { - "epoch": 0.7, - "learning_rate": 2.2280620508207873e-06, - "loss": 0.7099, - "step": 918 - }, - { - "epoch": 0.7, - "learning_rate": 2.2175535479862927e-06, - "loss": 0.7418, - "step": 919 - }, - { - "epoch": 0.7, - "learning_rate": 2.207062819106624e-06, - "loss": 0.7174, - "step": 920 - }, - { - "epoch": 0.7, - "learning_rate": 2.1965899311953777e-06, - "loss": 0.7083, - "step": 921 - }, - { - "epoch": 0.71, - "learning_rate": 2.1861349511521817e-06, - "loss": 0.684, - "step": 922 - }, - { - "epoch": 0.71, - "learning_rate": 2.1756979457622736e-06, - "loss": 0.7072, - "step": 923 - }, - { - "epoch": 0.71, - "learning_rate": 2.1652789816960677e-06, - "loss": 0.7316, - "step": 924 - }, - { - "epoch": 0.71, - "learning_rate": 2.1548781255087353e-06, - "loss": 0.6925, - "step": 925 - }, - { - "epoch": 0.71, - "learning_rate": 2.144495443639775e-06, - "loss": 0.7069, - "step": 926 - }, - { - "epoch": 0.71, - "learning_rate": 2.134131002412588e-06, - "loss": 0.7018, - "step": 927 - }, - { - "epoch": 0.71, - "learning_rate": 2.1237848680340635e-06, - "loss": 0.6931, - "step": 928 - }, - { - "epoch": 0.71, - "learning_rate": 2.1134571065941396e-06, - "loss": 0.6855, - "step": 929 - }, - { - "epoch": 0.71, - "learning_rate": 2.103147784065396e-06, - "loss": 0.7055, - "step": 930 - }, - { - "epoch": 0.71, - "learning_rate": 2.092856966302624e-06, - "loss": 0.7017, - "step": 931 - }, - { - "epoch": 0.71, - "learning_rate": 2.0825847190424096e-06, - "loss": 0.7027, - "step": 932 - }, - { - "epoch": 0.71, - "learning_rate": 2.072331107902713e-06, - "loss": 0.7046, - "step": 933 - }, - { - "epoch": 0.71, - "learning_rate": 2.0620961983824478e-06, - "loss": 0.7141, - "step": 934 - }, - { - "epoch": 0.72, - "learning_rate": 2.051880055861064e-06, - "loss": 0.6972, - "step": 935 - }, - { - "epoch": 0.72, - "learning_rate": 2.041682745598129e-06, - "loss": 0.6954, - "step": 936 - }, - { - "epoch": 0.72, - "learning_rate": 2.0315043327329164e-06, - "loss": 0.7176, - "step": 937 - }, - { - "epoch": 0.72, - "learning_rate": 2.0213448822839813e-06, - "loss": 0.6966, - "step": 938 - }, - { - "epoch": 0.72, - "learning_rate": 2.0112044591487497e-06, - "loss": 0.6961, - "step": 939 - }, - { - "epoch": 0.72, - "learning_rate": 2.0010831281031025e-06, - "loss": 0.7034, - "step": 940 - }, - { - "epoch": 0.72, - "learning_rate": 1.9909809538009644e-06, - "loss": 0.6919, - "step": 941 - }, - { - "epoch": 0.72, - "learning_rate": 1.9808980007738877e-06, - "loss": 0.7084, - "step": 942 - }, - { - "epoch": 0.72, - "learning_rate": 1.970834333430642e-06, - "loss": 0.6908, - "step": 943 - }, - { - "epoch": 0.72, - "learning_rate": 1.960790016056801e-06, - "loss": 0.727, - "step": 944 - }, - { - "epoch": 0.72, - "learning_rate": 1.950765112814333e-06, - "loss": 0.6916, - "step": 945 - }, - { - "epoch": 0.72, - "learning_rate": 1.940759687741195e-06, - "loss": 0.713, - "step": 946 - }, - { - "epoch": 0.72, - "learning_rate": 1.9307738047509147e-06, - "loss": 0.7095, - "step": 947 - }, - { - "epoch": 0.73, - "learning_rate": 1.9208075276321887e-06, - "loss": 0.6981, - "step": 948 - }, - { - "epoch": 0.73, - "learning_rate": 1.910860920048474e-06, - "loss": 0.7, - "step": 949 - }, - { - "epoch": 0.73, - "learning_rate": 1.9009340455375803e-06, - "loss": 0.7072, - "step": 950 - }, - { - "epoch": 0.73, - "learning_rate": 1.8910269675112651e-06, - "loss": 0.7065, - "step": 951 - }, - { - "epoch": 0.73, - "learning_rate": 1.8811397492548283e-06, - "loss": 0.6972, - "step": 952 - }, - { - "epoch": 0.73, - "learning_rate": 1.8712724539267069e-06, - "loss": 0.7006, - "step": 953 - }, - { - "epoch": 0.73, - "learning_rate": 1.8614251445580717e-06, - "loss": 0.6966, - "step": 954 - }, - { - "epoch": 0.73, - "learning_rate": 1.8515978840524302e-06, - "loss": 0.6852, - "step": 955 - }, - { - "epoch": 0.73, - "learning_rate": 1.8417907351852148e-06, - "loss": 0.7103, - "step": 956 - }, - { - "epoch": 0.73, - "learning_rate": 1.832003760603389e-06, - "loss": 0.7124, - "step": 957 - }, - { - "epoch": 0.73, - "learning_rate": 1.8222370228250452e-06, - "loss": 0.7167, - "step": 958 - }, - { - "epoch": 0.73, - "learning_rate": 1.8124905842390051e-06, - "loss": 0.7122, - "step": 959 - }, - { - "epoch": 0.73, - "learning_rate": 1.8027645071044214e-06, - "loss": 0.7031, - "step": 960 - }, - { - "epoch": 0.74, - "learning_rate": 1.7930588535503812e-06, - "loss": 0.7198, - "step": 961 - }, - { - "epoch": 0.74, - "learning_rate": 1.7833736855755063e-06, - "loss": 0.6989, - "step": 962 - }, - { - "epoch": 0.74, - "learning_rate": 1.7737090650475586e-06, - "loss": 0.725, - "step": 963 - }, - { - "epoch": 0.74, - "learning_rate": 1.7640650537030496e-06, - "loss": 0.7158, - "step": 964 - }, - { - "epoch": 0.74, - "learning_rate": 1.7544417131468373e-06, - "loss": 0.7008, - "step": 965 - }, - { - "epoch": 0.74, - "learning_rate": 1.7448391048517378e-06, - "loss": 0.6996, - "step": 966 - }, - { - "epoch": 0.74, - "learning_rate": 1.7352572901581322e-06, - "loss": 0.7044, - "step": 967 - }, - { - "epoch": 0.74, - "learning_rate": 1.7256963302735752e-06, - "loss": 0.7096, - "step": 968 - }, - { - "epoch": 0.74, - "learning_rate": 1.7161562862724018e-06, - "loss": 0.6957, - "step": 969 - }, - { - "epoch": 0.74, - "learning_rate": 1.70663721909534e-06, - "loss": 0.7023, - "step": 970 - }, - { - "epoch": 0.74, - "learning_rate": 1.6971391895491202e-06, - "loss": 0.6808, - "step": 971 - }, - { - "epoch": 0.74, - "learning_rate": 1.6876622583060854e-06, - "loss": 0.7144, - "step": 972 - }, - { - "epoch": 0.74, - "learning_rate": 1.678206485903805e-06, - "loss": 0.7106, - "step": 973 - }, - { - "epoch": 0.75, - "learning_rate": 1.6687719327446917e-06, - "loss": 0.6884, - "step": 974 - }, - { - "epoch": 0.75, - "learning_rate": 1.6593586590956084e-06, - "loss": 0.6937, - "step": 975 - }, - { - "epoch": 0.75, - "learning_rate": 1.6499667250874873e-06, - "loss": 0.7006, - "step": 976 - }, - { - "epoch": 0.75, - "learning_rate": 1.640596190714947e-06, - "loss": 0.7151, - "step": 977 - }, - { - "epoch": 0.75, - "learning_rate": 1.631247115835905e-06, - "loss": 0.7036, - "step": 978 - }, - { - "epoch": 0.75, - "learning_rate": 1.6219195601712011e-06, - "loss": 0.7016, - "step": 979 - }, - { - "epoch": 0.75, - "learning_rate": 1.6126135833042107e-06, - "loss": 0.6869, - "step": 980 - }, - { - "epoch": 0.75, - "learning_rate": 1.603329244680466e-06, - "loss": 0.7031, - "step": 981 - }, - { - "epoch": 0.75, - "learning_rate": 1.594066603607277e-06, - "loss": 0.6865, - "step": 982 - }, - { - "epoch": 0.75, - "learning_rate": 1.584825719253355e-06, - "loss": 0.7079, - "step": 983 - }, - { - "epoch": 0.75, - "learning_rate": 1.5756066506484285e-06, - "loss": 0.6869, - "step": 984 - }, - { - "epoch": 0.75, - "learning_rate": 1.5664094566828696e-06, - "loss": 0.6938, - "step": 985 - }, - { - "epoch": 0.75, - "learning_rate": 1.5572341961073185e-06, - "loss": 0.6961, - "step": 986 - }, - { - "epoch": 0.75, - "learning_rate": 1.5480809275323073e-06, - "loss": 0.6988, - "step": 987 - }, - { - "epoch": 0.76, - "learning_rate": 1.5389497094278861e-06, - "loss": 0.7096, - "step": 988 - }, - { - "epoch": 0.76, - "learning_rate": 1.529840600123248e-06, - "loss": 0.7149, - "step": 989 - }, - { - "epoch": 0.76, - "learning_rate": 1.5207536578063587e-06, - "loss": 0.6976, - "step": 990 - }, - { - "epoch": 0.76, - "learning_rate": 1.5116889405235807e-06, - "loss": 0.6968, - "step": 991 - }, - { - "epoch": 0.76, - "learning_rate": 1.5026465061793122e-06, - "loss": 0.7032, - "step": 992 - }, - { - "epoch": 0.76, - "learning_rate": 1.4936264125356042e-06, - "loss": 0.6903, - "step": 993 - }, - { - "epoch": 0.76, - "learning_rate": 1.4846287172117996e-06, - "loss": 0.695, - "step": 994 - }, - { - "epoch": 0.76, - "learning_rate": 1.4756534776841642e-06, - "loss": 0.7042, - "step": 995 - }, - { - "epoch": 0.76, - "learning_rate": 1.4667007512855174e-06, - "loss": 0.7104, - "step": 996 - }, - { - "epoch": 0.76, - "learning_rate": 1.4577705952048704e-06, - "loss": 0.6902, - "step": 997 - }, - { - "epoch": 0.76, - "learning_rate": 1.4488630664870495e-06, - "loss": 0.7318, - "step": 998 - }, - { - "epoch": 0.76, - "learning_rate": 1.4399782220323515e-06, - "loss": 0.7082, - "step": 999 - }, - { - "epoch": 0.76, - "learning_rate": 1.4311161185961614e-06, - "loss": 0.6879, - "step": 1000 - }, - { - "epoch": 0.77, - "learning_rate": 1.4222768127885977e-06, - "loss": 0.6826, - "step": 1001 - }, - { - "epoch": 0.77, - "learning_rate": 1.4134603610741526e-06, - "loss": 0.6975, - "step": 1002 - }, - { - "epoch": 0.77, - "learning_rate": 1.4046668197713259e-06, - "loss": 0.7011, - "step": 1003 - }, - { - "epoch": 0.77, - "learning_rate": 1.3958962450522711e-06, - "loss": 0.6868, - "step": 1004 - }, - { - "epoch": 0.77, - "learning_rate": 1.3871486929424326e-06, - "loss": 0.7023, - "step": 1005 - }, - { - "epoch": 0.77, - "learning_rate": 1.3784242193201885e-06, - "loss": 0.6896, - "step": 1006 - }, - { - "epoch": 0.77, - "learning_rate": 1.369722879916493e-06, - "loss": 0.724, - "step": 1007 - }, - { - "epoch": 0.77, - "learning_rate": 1.3610447303145258e-06, - "loss": 0.6863, - "step": 1008 - }, - { - "epoch": 0.77, - "learning_rate": 1.352389825949329e-06, - "loss": 0.6964, - "step": 1009 - }, - { - "epoch": 0.77, - "learning_rate": 1.3437582221074574e-06, - "loss": 0.7075, - "step": 1010 - }, - { - "epoch": 0.77, - "learning_rate": 1.3351499739266255e-06, - "loss": 0.7074, - "step": 1011 - }, - { - "epoch": 0.77, - "learning_rate": 1.3265651363953536e-06, - "loss": 0.6977, - "step": 1012 - }, - { - "epoch": 0.77, - "learning_rate": 1.3180037643526184e-06, - "loss": 0.7016, - "step": 1013 - }, - { - "epoch": 0.78, - "learning_rate": 1.3094659124874998e-06, - "loss": 0.718, - "step": 1014 - }, - { - "epoch": 0.78, - "learning_rate": 1.3009516353388357e-06, - "loss": 0.707, - "step": 1015 - }, - { - "epoch": 0.78, - "learning_rate": 1.292460987294869e-06, - "loss": 0.7223, - "step": 1016 - }, - { - "epoch": 0.78, - "learning_rate": 1.2839940225929066e-06, - "loss": 0.7001, - "step": 1017 - }, - { - "epoch": 0.78, - "learning_rate": 1.275550795318965e-06, - "loss": 0.7024, - "step": 1018 - }, - { - "epoch": 0.78, - "learning_rate": 1.2671313594074293e-06, - "loss": 0.6991, - "step": 1019 - }, - { - "epoch": 0.78, - "learning_rate": 1.2587357686407093e-06, - "loss": 0.6992, - "step": 1020 - }, - { - "epoch": 0.78, - "learning_rate": 1.250364076648894e-06, - "loss": 0.7028, - "step": 1021 - }, - { - "epoch": 0.78, - "learning_rate": 1.242016336909409e-06, - "loss": 0.7003, - "step": 1022 - }, - { - "epoch": 0.78, - "learning_rate": 1.2336926027466777e-06, - "loss": 0.7146, - "step": 1023 - }, - { - "epoch": 0.78, - "learning_rate": 1.2253929273317767e-06, - "loss": 0.6865, - "step": 1024 - }, - { - "epoch": 0.78, - "eval_loss": 1.2678072452545166, - "eval_runtime": 126.3097, - "eval_samples_per_second": 94.759, - "eval_steps_per_second": 23.696, - "step": 1024 - }, - { - "epoch": 0.78, - "learning_rate": 1.2171173636820965e-06, - "loss": 0.6948, - "step": 1025 - }, - { - "epoch": 0.78, - "learning_rate": 1.20886596466101e-06, - "loss": 0.6955, - "step": 1026 - }, - { - "epoch": 0.79, - "learning_rate": 1.200638782977524e-06, - "loss": 0.7001, - "step": 1027 - }, - { - "epoch": 0.79, - "learning_rate": 1.1924358711859491e-06, - "loss": 0.6819, - "step": 1028 - }, - { - "epoch": 0.79, - "learning_rate": 1.1842572816855635e-06, - "loss": 0.693, - "step": 1029 - }, - { - "epoch": 0.79, - "learning_rate": 1.1761030667202755e-06, - "loss": 0.6869, - "step": 1030 - }, - { - "epoch": 0.79, - "learning_rate": 1.1679732783782933e-06, - "loss": 0.7293, - "step": 1031 - }, - { - "epoch": 0.79, - "learning_rate": 1.1598679685917901e-06, - "loss": 0.694, - "step": 1032 - }, - { - "epoch": 0.79, - "learning_rate": 1.1517871891365728e-06, - "loss": 0.7244, - "step": 1033 - }, - { - "epoch": 0.79, - "learning_rate": 1.1437309916317507e-06, - "loss": 0.6934, - "step": 1034 - }, - { - "epoch": 0.79, - "learning_rate": 1.1356994275394057e-06, - "loss": 0.7087, - "step": 1035 - }, - { - "epoch": 0.79, - "learning_rate": 1.1276925481642682e-06, - "loss": 0.7021, - "step": 1036 - }, - { - "epoch": 0.79, - "learning_rate": 1.1197104046533814e-06, - "loss": 0.7045, - "step": 1037 - }, - { - "epoch": 0.79, - "learning_rate": 1.11175304799578e-06, - "loss": 0.6913, - "step": 1038 - }, - { - "epoch": 0.79, - "learning_rate": 1.1038205290221621e-06, - "loss": 0.7002, - "step": 1039 - }, - { - "epoch": 0.8, - "learning_rate": 1.0959128984045674e-06, - "loss": 0.6875, - "step": 1040 - }, - { - "epoch": 0.8, - "learning_rate": 1.0880302066560493e-06, - "loss": 0.6966, - "step": 1041 - }, - { - "epoch": 0.8, - "learning_rate": 1.0801725041303567e-06, - "loss": 0.6964, - "step": 1042 - }, - { - "epoch": 0.8, - "learning_rate": 1.0723398410216085e-06, - "loss": 0.7128, - "step": 1043 - }, - { - "epoch": 0.8, - "learning_rate": 1.0645322673639734e-06, - "loss": 0.6982, - "step": 1044 - }, - { - "epoch": 0.8, - "learning_rate": 1.056749833031357e-06, - "loss": 0.7027, - "step": 1045 - }, - { - "epoch": 0.8, - "learning_rate": 1.0489925877370721e-06, - "loss": 0.6903, - "step": 1046 - }, - { - "epoch": 0.8, - "learning_rate": 1.041260581033529e-06, - "loss": 0.711, - "step": 1047 - }, - { - "epoch": 0.8, - "learning_rate": 1.0335538623119168e-06, - "loss": 0.6961, - "step": 1048 - }, - { - "epoch": 0.8, - "learning_rate": 1.0258724808018866e-06, - "loss": 0.6975, - "step": 1049 - }, - { - "epoch": 0.8, - "learning_rate": 1.0182164855712395e-06, - "loss": 0.6867, - "step": 1050 - }, - { - "epoch": 0.8, - "learning_rate": 1.0105859255256112e-06, - "loss": 0.6974, - "step": 1051 - }, - { - "epoch": 0.8, - "learning_rate": 1.002980849408161e-06, - "loss": 0.7059, - "step": 1052 - }, - { - "epoch": 0.81, - "learning_rate": 9.954013057992584e-07, - "loss": 0.696, - "step": 1053 - }, - { - "epoch": 0.81, - "learning_rate": 9.878473431161767e-07, - "loss": 0.6894, - "step": 1054 - }, - { - "epoch": 0.81, - "learning_rate": 9.80319009612779e-07, - "loss": 0.7057, - "step": 1055 - }, - { - "epoch": 0.81, - "learning_rate": 9.728163533792124e-07, - "loss": 0.7003, - "step": 1056 - }, - { - "epoch": 0.81, - "learning_rate": 9.653394223416e-07, - "loss": 0.6881, - "step": 1057 - }, - { - "epoch": 0.81, - "learning_rate": 9.578882642617365e-07, - "loss": 0.7012, - "step": 1058 - }, - { - "epoch": 0.81, - "learning_rate": 9.504629267367809e-07, - "loss": 0.7019, - "step": 1059 - }, - { - "epoch": 0.81, - "learning_rate": 9.430634571989544e-07, - "loss": 0.7019, - "step": 1060 - }, - { - "epoch": 0.81, - "learning_rate": 9.35689902915235e-07, - "loss": 0.7099, - "step": 1061 - }, - { - "epoch": 0.81, - "learning_rate": 9.283423109870565e-07, - "loss": 0.7046, - "step": 1062 - }, - { - "epoch": 0.81, - "learning_rate": 9.210207283500122e-07, - "loss": 0.7215, - "step": 1063 - }, - { - "epoch": 0.81, - "learning_rate": 9.137252017735471e-07, - "loss": 0.7088, - "step": 1064 - }, - { - "epoch": 0.81, - "learning_rate": 9.064557778606631e-07, - "loss": 0.7, - "step": 1065 - }, - { - "epoch": 0.82, - "learning_rate": 8.992125030476229e-07, - "loss": 0.7037, - "step": 1066 - }, - { - "epoch": 0.82, - "learning_rate": 8.919954236036499e-07, - "loss": 0.7102, - "step": 1067 - }, - { - "epoch": 0.82, - "learning_rate": 8.84804585630637e-07, - "loss": 0.6973, - "step": 1068 - }, - { - "epoch": 0.82, - "learning_rate": 8.776400350628467e-07, - "loss": 0.6911, - "step": 1069 - }, - { - "epoch": 0.82, - "learning_rate": 8.705018176666224e-07, - "loss": 0.6951, - "step": 1070 - }, - { - "epoch": 0.82, - "learning_rate": 8.633899790400923e-07, - "loss": 0.6801, - "step": 1071 - }, - { - "epoch": 0.82, - "learning_rate": 8.563045646128832e-07, - "loss": 0.7069, - "step": 1072 - }, - { - "epoch": 0.82, - "learning_rate": 8.492456196458249e-07, - "loss": 0.7039, - "step": 1073 - }, - { - "epoch": 0.82, - "learning_rate": 8.422131892306623e-07, - "loss": 0.7186, - "step": 1074 - }, - { - "epoch": 0.82, - "learning_rate": 8.352073182897702e-07, - "loss": 0.7145, - "step": 1075 - }, - { - "epoch": 0.82, - "learning_rate": 8.282280515758639e-07, - "loss": 0.7132, - "step": 1076 - }, - { - "epoch": 0.82, - "learning_rate": 8.212754336717138e-07, - "loss": 0.6908, - "step": 1077 - }, - { - "epoch": 0.82, - "learning_rate": 8.143495089898607e-07, - "loss": 0.7016, - "step": 1078 - }, - { - "epoch": 0.83, - "learning_rate": 8.074503217723312e-07, - "loss": 0.6983, - "step": 1079 - }, - { - "epoch": 0.83, - "learning_rate": 8.005779160903571e-07, - "loss": 0.7035, - "step": 1080 - }, - { - "epoch": 0.83, - "learning_rate": 7.937323358440935e-07, - "loss": 0.6988, - "step": 1081 - }, - { - "epoch": 0.83, - "learning_rate": 7.869136247623365e-07, - "loss": 0.7013, - "step": 1082 - }, - { - "epoch": 0.83, - "learning_rate": 7.801218264022453e-07, - "loss": 0.7088, - "step": 1083 - }, - { - "epoch": 0.83, - "learning_rate": 7.733569841490629e-07, - "loss": 0.6972, - "step": 1084 - }, - { - "epoch": 0.83, - "learning_rate": 7.666191412158403e-07, - "loss": 0.6858, - "step": 1085 - }, - { - "epoch": 0.83, - "learning_rate": 7.599083406431617e-07, - "loss": 0.7078, - "step": 1086 - }, - { - "epoch": 0.83, - "learning_rate": 7.532246252988617e-07, - "loss": 0.6828, - "step": 1087 - }, - { - "epoch": 0.83, - "learning_rate": 7.465680378777662e-07, - "loss": 0.7005, - "step": 1088 - }, - { - "epoch": 0.83, - "learning_rate": 7.399386209014054e-07, - "loss": 0.6927, - "step": 1089 - }, - { - "epoch": 0.83, - "learning_rate": 7.333364167177481e-07, - "loss": 0.6869, - "step": 1090 - }, - { - "epoch": 0.83, - "learning_rate": 7.267614675009322e-07, - "loss": 0.6914, - "step": 1091 - }, - { - "epoch": 0.84, - "learning_rate": 7.202138152509929e-07, - "loss": 0.6992, - "step": 1092 - }, - { - "epoch": 0.84, - "learning_rate": 7.13693501793597e-07, - "loss": 0.6901, - "step": 1093 - }, - { - "epoch": 0.84, - "learning_rate": 7.072005687797723e-07, - "loss": 0.6728, - "step": 1094 - }, - { - "epoch": 0.84, - "learning_rate": 7.007350576856442e-07, - "loss": 0.7052, - "step": 1095 - }, - { - "epoch": 0.84, - "learning_rate": 6.942970098121704e-07, - "loss": 0.6781, - "step": 1096 - }, - { - "epoch": 0.84, - "learning_rate": 6.878864662848755e-07, - "loss": 0.7024, - "step": 1097 - }, - { - "epoch": 0.84, - "learning_rate": 6.815034680535915e-07, - "loss": 0.7083, - "step": 1098 - }, - { - "epoch": 0.84, - "learning_rate": 6.751480558921919e-07, - "loss": 0.6986, - "step": 1099 - }, - { - "epoch": 0.84, - "learning_rate": 6.688202703983348e-07, - "loss": 0.7016, - "step": 1100 - }, - { - "epoch": 0.84, - "learning_rate": 6.625201519932018e-07, - "loss": 0.7088, - "step": 1101 - }, - { - "epoch": 0.84, - "learning_rate": 6.562477409212404e-07, - "loss": 0.6875, - "step": 1102 - }, - { - "epoch": 0.84, - "learning_rate": 6.500030772499061e-07, - "loss": 0.6862, - "step": 1103 - }, - { - "epoch": 0.84, - "learning_rate": 6.437862008694085e-07, - "loss": 0.6797, - "step": 1104 - }, - { - "epoch": 0.85, - "learning_rate": 6.375971514924539e-07, - "loss": 0.7171, - "step": 1105 - }, - { - "epoch": 0.85, - "learning_rate": 6.314359686539917e-07, - "loss": 0.7086, - "step": 1106 - }, - { - "epoch": 0.85, - "learning_rate": 6.253026917109678e-07, - "loss": 0.7074, - "step": 1107 - }, - { - "epoch": 0.85, - "learning_rate": 6.191973598420631e-07, - "loss": 0.7049, - "step": 1108 - }, - { - "epoch": 0.85, - "learning_rate": 6.131200120474512e-07, - "loss": 0.6803, - "step": 1109 - }, - { - "epoch": 0.85, - "learning_rate": 6.07070687148546e-07, - "loss": 0.6985, - "step": 1110 - }, - { - "epoch": 0.85, - "learning_rate": 6.010494237877534e-07, - "loss": 0.7024, - "step": 1111 - }, - { - "epoch": 0.85, - "learning_rate": 5.950562604282267e-07, - "loss": 0.6908, - "step": 1112 - }, - { - "epoch": 0.85, - "learning_rate": 5.890912353536188e-07, - "loss": 0.7023, - "step": 1113 - }, - { - "epoch": 0.85, - "learning_rate": 5.831543866678391e-07, - "loss": 0.6944, - "step": 1114 - }, - { - "epoch": 0.85, - "learning_rate": 5.772457522948077e-07, - "loss": 0.7271, - "step": 1115 - }, - { - "epoch": 0.85, - "learning_rate": 5.713653699782179e-07, - "loss": 0.7157, - "step": 1116 - }, - { - "epoch": 0.85, - "learning_rate": 5.655132772812899e-07, - "loss": 0.7093, - "step": 1117 - }, - { - "epoch": 0.86, - "learning_rate": 5.59689511586532e-07, - "loss": 0.6889, - "step": 1118 - }, - { - "epoch": 0.86, - "learning_rate": 5.538941100955053e-07, - "loss": 0.7112, - "step": 1119 - }, - { - "epoch": 0.86, - "learning_rate": 5.481271098285818e-07, - "loss": 0.6972, - "step": 1120 - }, - { - "epoch": 0.86, - "learning_rate": 5.423885476247104e-07, - "loss": 0.7056, - "step": 1121 - }, - { - "epoch": 0.86, - "learning_rate": 5.366784601411807e-07, - "loss": 0.6812, - "step": 1122 - }, - { - "epoch": 0.86, - "learning_rate": 5.309968838533891e-07, - "loss": 0.7162, - "step": 1123 - }, - { - "epoch": 0.86, - "learning_rate": 5.253438550546042e-07, - "loss": 0.6811, - "step": 1124 - }, - { - "epoch": 0.86, - "learning_rate": 5.197194098557401e-07, - "loss": 0.6966, - "step": 1125 - }, - { - "epoch": 0.86, - "learning_rate": 5.141235841851189e-07, - "loss": 0.6986, - "step": 1126 - }, - { - "epoch": 0.86, - "learning_rate": 5.085564137882448e-07, - "loss": 0.7063, - "step": 1127 - }, - { - "epoch": 0.86, - "learning_rate": 5.030179342275765e-07, - "loss": 0.6933, - "step": 1128 - }, - { - "epoch": 0.86, - "learning_rate": 4.975081808822979e-07, - "loss": 0.7102, - "step": 1129 - }, - { - "epoch": 0.86, - "learning_rate": 4.920271889480933e-07, - "loss": 0.6942, - "step": 1130 - }, - { - "epoch": 0.87, - "learning_rate": 4.865749934369224e-07, - "loss": 0.7335, - "step": 1131 - }, - { - "epoch": 0.87, - "learning_rate": 4.811516291767964e-07, - "loss": 0.703, - "step": 1132 - }, - { - "epoch": 0.87, - "learning_rate": 4.757571308115555e-07, - "loss": 0.7029, - "step": 1133 - }, - { - "epoch": 0.87, - "learning_rate": 4.703915328006492e-07, - "loss": 0.7031, - "step": 1134 - }, - { - "epoch": 0.87, - "learning_rate": 4.650548694189139e-07, - "loss": 0.7114, - "step": 1135 - }, - { - "epoch": 0.87, - "learning_rate": 4.597471747563542e-07, - "loss": 0.7106, - "step": 1136 - }, - { - "epoch": 0.87, - "learning_rate": 4.544684827179269e-07, - "loss": 0.7151, - "step": 1137 - }, - { - "epoch": 0.87, - "learning_rate": 4.4921882702332165e-07, - "loss": 0.7114, - "step": 1138 - }, - { - "epoch": 0.87, - "learning_rate": 4.4399824120674897e-07, - "loss": 0.6883, - "step": 1139 - }, - { - "epoch": 0.87, - "learning_rate": 4.388067586167233e-07, - "loss": 0.7035, - "step": 1140 - }, - { - "epoch": 0.87, - "learning_rate": 4.3364441241585175e-07, - "loss": 0.7296, - "step": 1141 - }, - { - "epoch": 0.87, - "learning_rate": 4.2851123558061927e-07, - "loss": 0.7015, - "step": 1142 - }, - { - "epoch": 0.87, - "learning_rate": 4.234072609011841e-07, - "loss": 0.6942, - "step": 1143 - }, - { - "epoch": 0.88, - "learning_rate": 4.183325209811617e-07, - "loss": 0.6933, - "step": 1144 - }, - { - "epoch": 0.88, - "learning_rate": 4.132870482374185e-07, - "loss": 0.6998, - "step": 1145 - }, - { - "epoch": 0.88, - "learning_rate": 4.082708748998676e-07, - "loss": 0.7104, - "step": 1146 - }, - { - "epoch": 0.88, - "learning_rate": 4.0328403301125973e-07, - "loss": 0.6964, - "step": 1147 - }, - { - "epoch": 0.88, - "learning_rate": 3.9832655442697876e-07, - "loss": 0.7007, - "step": 1148 - }, - { - "epoch": 0.88, - "learning_rate": 3.933984708148414e-07, - "loss": 0.7143, - "step": 1149 - }, - { - "epoch": 0.88, - "learning_rate": 3.884998136548901e-07, - "loss": 0.6891, - "step": 1150 - }, - { - "epoch": 0.88, - "learning_rate": 3.836306142391949e-07, - "loss": 0.712, - "step": 1151 - }, - { - "epoch": 0.88, - "learning_rate": 3.7879090367165585e-07, - "loss": 0.711, - "step": 1152 - }, - { - "epoch": 0.88, - "eval_loss": 1.267896056175232, - "eval_runtime": 126.2323, - "eval_samples_per_second": 94.817, - "eval_steps_per_second": 23.71, - "step": 1152 - } - ], - "max_steps": 1307, - "num_train_epochs": 1, - "total_flos": 4.611495605347287e+18, - "trial_name": null, - "trial_params": null -}