diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14623 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.999229939935315, + "global_step": 2433, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.73972602739726e-07, + "loss": 1.658, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.47945205479452e-07, + "loss": 1.8738, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 8.219178082191781e-07, + "loss": 1.1654, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.095890410958904e-06, + "loss": 1.1991, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.3698630136986302e-06, + "loss": 1.0743, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 1.6438356164383561e-06, + "loss": 1.1493, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 1.9178082191780823e-06, + "loss": 1.1059, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 2.191780821917808e-06, + "loss": 1.0695, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 2.4657534246575345e-06, + "loss": 1.0905, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 2.7397260273972604e-06, + "loss": 1.2188, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 3.0136986301369864e-06, + "loss": 1.1695, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 3.2876712328767123e-06, + "loss": 1.0543, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 3.5616438356164386e-06, + "loss": 1.0257, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 3.8356164383561645e-06, + "loss": 1.001, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 4.109589041095891e-06, + "loss": 0.9035, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 4.383561643835616e-06, + "loss": 0.9979, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 4.657534246575343e-06, + "loss": 1.0299, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 4.931506849315069e-06, + "loss": 1.0091, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 5.2054794520547945e-06, + "loss": 0.9827, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 5.479452054794521e-06, + "loss": 1.0258, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 5.753424657534246e-06, + "loss": 1.0672, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 6.027397260273973e-06, + "loss": 1.0086, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 6.301369863013699e-06, + "loss": 1.0374, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 6.5753424657534245e-06, + "loss": 0.978, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 6.849315068493151e-06, + "loss": 1.0097, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 7.123287671232877e-06, + "loss": 1.0713, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 7.397260273972603e-06, + "loss": 1.0203, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 7.671232876712329e-06, + "loss": 1.0834, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 7.945205479452055e-06, + "loss": 1.0166, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 8.219178082191782e-06, + "loss": 1.0958, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 8.493150684931507e-06, + "loss": 0.962, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 8.767123287671233e-06, + "loss": 0.9332, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.04109589041096e-06, + "loss": 0.9713, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 9.315068493150685e-06, + "loss": 1.0534, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.589041095890411e-06, + "loss": 0.9566, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 9.863013698630138e-06, + "loss": 1.0108, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 1.0136986301369864e-05, + "loss": 0.8976, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 1.0410958904109589e-05, + "loss": 1.0651, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 1.0684931506849316e-05, + "loss": 1.0302, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 1.0958904109589042e-05, + "loss": 1.0534, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 1.1232876712328769e-05, + "loss": 0.9088, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 1.1506849315068493e-05, + "loss": 1.0085, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 1.178082191780822e-05, + "loss": 1.0221, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 1.2054794520547945e-05, + "loss": 1.0166, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 1.2328767123287673e-05, + "loss": 0.9398, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 1.2602739726027398e-05, + "loss": 1.0277, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 1.2876712328767125e-05, + "loss": 1.0425, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 1.3150684931506849e-05, + "loss": 0.8892, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 1.3424657534246576e-05, + "loss": 0.9969, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 1.3698630136986302e-05, + "loss": 0.9618, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 1.3972602739726029e-05, + "loss": 1.0422, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 1.4246575342465754e-05, + "loss": 0.9439, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 1.4520547945205482e-05, + "loss": 1.0713, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 1.4794520547945205e-05, + "loss": 1.1741, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 1.5068493150684933e-05, + "loss": 1.1307, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 1.5342465753424658e-05, + "loss": 1.0006, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 1.5616438356164384e-05, + "loss": 0.912, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 1.589041095890411e-05, + "loss": 0.8973, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 1.6164383561643838e-05, + "loss": 1.0237, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 1.6438356164383563e-05, + "loss": 0.9566, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 1.671232876712329e-05, + "loss": 0.9816, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 1.6986301369863014e-05, + "loss": 0.9593, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 1.726027397260274e-05, + "loss": 1.0471, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 1.7534246575342465e-05, + "loss": 1.0543, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 1.7808219178082194e-05, + "loss": 0.9724, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 1.808219178082192e-05, + "loss": 1.005, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 1.8356164383561645e-05, + "loss": 0.9581, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 1.863013698630137e-05, + "loss": 0.9424, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 1.8904109589041096e-05, + "loss": 1.0022, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 1.9178082191780822e-05, + "loss": 1.0554, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 1.945205479452055e-05, + "loss": 1.1595, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 1.9726027397260276e-05, + "loss": 0.9725, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 2e-05, + "loss": 0.9838, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999113975605e-05, + "loss": 1.0296, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 1.999996455903989e-05, + "loss": 1.0783, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 1.999992025789863e-05, + "loss": 1.0363, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999858236410775e-05, + "loss": 0.9063, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999778494686226e-05, + "loss": 0.8785, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999681032866287e-05, + "loss": 0.9943, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 1.999956585112367e-05, + "loss": 0.9244, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999432949662483e-05, + "loss": 0.8962, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 1.999928232871823e-05, + "loss": 1.0743, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 1.999911398855782e-05, + "loss": 0.974, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 1.999892792947956e-05, + "loss": 1.0517, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998724151813157e-05, + "loss": 0.9491, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998502655919713e-05, + "loss": 1.0521, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 1.999826344219173e-05, + "loss": 1.0169, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 1.999800651105311e-05, + "loss": 1.0079, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997731862959143e-05, + "loss": 1.0357, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 1.999743949839652e-05, + "loss": 0.9522, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997129417883326e-05, + "loss": 0.9206, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996801621969037e-05, + "loss": 0.9974, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 1.999645611123453e-05, + "loss": 1.0022, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996092886292055e-05, + "loss": 1.0249, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995711947785276e-05, + "loss": 1.072, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995313296389226e-05, + "loss": 1.0365, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 1.999489693281034e-05, + "loss": 1.015, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994462857786433e-05, + "loss": 0.8519, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994011072086707e-05, + "loss": 1.0599, + "step": 99 + }, + { + "epoch": 0.12, + "learning_rate": 1.999354157651175e-05, + "loss": 1.0633, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993054371893526e-05, + "loss": 1.0066, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 1.999254945909539e-05, + "loss": 1.0184, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 1.999202683901207e-05, + "loss": 1.0657, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991486512569677e-05, + "loss": 0.9894, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990928480725694e-05, + "loss": 1.0008, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 1.999035274446898e-05, + "loss": 1.0979, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989759304819765e-05, + "loss": 0.9782, + "step": 107 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989148162829663e-05, + "loss": 1.0324, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988519319581637e-05, + "loss": 1.1834, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 1.998787277619003e-05, + "loss": 1.1209, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987208533800552e-05, + "loss": 1.1269, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986526593590275e-05, + "loss": 1.0442, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 1.998582695676762e-05, + "loss": 1.0206, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 1.998510962457239e-05, + "loss": 0.926, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984374598275722e-05, + "loss": 0.9785, + "step": 115 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983621879180125e-05, + "loss": 0.91, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 1.998285146861945e-05, + "loss": 1.0386, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 1.9982063367958907e-05, + "loss": 0.9223, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981257578595047e-05, + "loss": 1.0241, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980434101955762e-05, + "loss": 0.9925, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 1.99795929395003e-05, + "loss": 1.0568, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978734092719244e-05, + "loss": 1.028, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977857563134503e-05, + "loss": 1.1134, + "step": 123 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976963352299337e-05, + "loss": 1.0476, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 1.997605146179833e-05, + "loss": 1.0234, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975121893247396e-05, + "loss": 0.9512, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974174648293774e-05, + "loss": 1.1104, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973209728616032e-05, + "loss": 1.197, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 1.997222713592405e-05, + "loss": 1.1371, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971226871959037e-05, + "loss": 1.1173, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970208938493504e-05, + "loss": 1.0441, + "step": 131 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969173337331283e-05, + "loss": 1.0247, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968120070307503e-05, + "loss": 1.0264, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 1.996704913928861e-05, + "loss": 1.0072, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965960546172346e-05, + "loss": 1.1115, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964854292887747e-05, + "loss": 0.8799, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963730381395154e-05, + "loss": 1.0802, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 1.9962588813686187e-05, + "loss": 0.9735, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961429591783764e-05, + "loss": 1.0063, + "step": 139 + }, + { + "epoch": 0.17, + "learning_rate": 1.996025271774208e-05, + "loss": 0.9453, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959058193646618e-05, + "loss": 1.1692, + "step": 141 + }, + { + "epoch": 0.17, + "learning_rate": 1.9957846021614126e-05, + "loss": 0.971, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956616203792636e-05, + "loss": 1.0007, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955368742361445e-05, + "loss": 1.0786, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954103639531116e-05, + "loss": 0.9693, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952820897543468e-05, + "loss": 1.0557, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951520518671587e-05, + "loss": 1.014, + "step": 147 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950202505219808e-05, + "loss": 0.9715, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 1.9948866859523717e-05, + "loss": 1.0267, + "step": 149 + }, + { + "epoch": 0.18, + "learning_rate": 1.994751358395014e-05, + "loss": 1.0039, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946142680897145e-05, + "loss": 0.9758, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 1.9944754152794044e-05, + "loss": 1.1135, + "step": 152 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943348002101374e-05, + "loss": 1.0018, + "step": 153 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941924231310903e-05, + "loss": 1.0246, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 1.994048284294562e-05, + "loss": 1.0571, + "step": 155 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939023839559745e-05, + "loss": 1.0445, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 1.993754722373869e-05, + "loss": 0.967, + "step": 157 + }, + { + "epoch": 0.19, + "learning_rate": 1.99360529980991e-05, + "loss": 0.999, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 1.993454116528881e-05, + "loss": 0.9723, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933011727986865e-05, + "loss": 0.9921, + "step": 160 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931464688903502e-05, + "loss": 1.0482, + "step": 161 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929900050780147e-05, + "loss": 0.9949, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928317816389416e-05, + "loss": 0.9308, + "step": 163 + }, + { + "epoch": 0.2, + "learning_rate": 1.992671798853511e-05, + "loss": 1.0839, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 1.9925100570052194e-05, + "loss": 1.0279, + "step": 165 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923465563806825e-05, + "loss": 1.1507, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921812972696298e-05, + "loss": 1.0505, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920142799649098e-05, + "loss": 0.9731, + "step": 168 + }, + { + "epoch": 0.21, + "learning_rate": 1.9918455047624847e-05, + "loss": 1.0994, + "step": 169 + }, + { + "epoch": 0.21, + "learning_rate": 1.9916749719614326e-05, + "loss": 1.0291, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 1.9915026818639457e-05, + "loss": 1.094, + "step": 171 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913286347753306e-05, + "loss": 1.0935, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911528310040073e-05, + "loss": 0.9176, + "step": 173 + }, + { + "epoch": 0.21, + "learning_rate": 1.9909752708615088e-05, + "loss": 1.0972, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 1.99079595466248e-05, + "loss": 1.0049, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 1.990614882724678e-05, + "loss": 0.9358, + "step": 176 + }, + { + "epoch": 0.22, + "learning_rate": 1.990432055368971e-05, + "loss": 1.0514, + "step": 177 + }, + { + "epoch": 0.22, + "learning_rate": 1.9902474729193385e-05, + "loss": 1.0593, + "step": 178 + }, + { + "epoch": 0.22, + "learning_rate": 1.990061135702869e-05, + "loss": 0.8965, + "step": 179 + }, + { + "epoch": 0.22, + "learning_rate": 1.989873044049762e-05, + "loss": 0.9694, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 1.989683198293324e-05, + "loss": 1.1043, + "step": 181 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894915987699718e-05, + "loss": 1.0442, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 1.9892982458192286e-05, + "loss": 0.9898, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 1.9891031397837258e-05, + "loss": 0.9948, + "step": 184 + }, + { + "epoch": 0.23, + "learning_rate": 1.9889062810092002e-05, + "loss": 1.0379, + "step": 185 + }, + { + "epoch": 0.23, + "learning_rate": 1.9887076698444953e-05, + "loss": 1.071, + "step": 186 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885073066415596e-05, + "loss": 0.9517, + "step": 187 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883051917554473e-05, + "loss": 1.0254, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 1.9881013255443152e-05, + "loss": 1.0169, + "step": 189 + }, + { + "epoch": 0.23, + "learning_rate": 1.987895708369424e-05, + "loss": 1.0932, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876883405951378e-05, + "loss": 1.1318, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874792225889223e-05, + "loss": 0.9104, + "step": 192 + }, + { + "epoch": 0.24, + "learning_rate": 1.9872683547213446e-05, + "loss": 1.0188, + "step": 193 + }, + { + "epoch": 0.24, + "learning_rate": 1.9870557373660733e-05, + "loss": 0.9453, + "step": 194 + }, + { + "epoch": 0.24, + "learning_rate": 1.986841370899876e-05, + "loss": 0.9985, + "step": 195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9866252557026215e-05, + "loss": 0.9495, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 1.9864073921572756e-05, + "loss": 0.9413, + "step": 197 + }, + { + "epoch": 0.24, + "learning_rate": 1.9861877806499033e-05, + "loss": 0.9551, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859664215696676e-05, + "loss": 0.9801, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 1.9857433153088267e-05, + "loss": 1.163, + "step": 200 + }, + { + "epoch": 0.25, + "learning_rate": 1.9855184622627362e-05, + "loss": 1.0419, + "step": 201 + }, + { + "epoch": 0.25, + "learning_rate": 1.9852918628298466e-05, + "loss": 1.1131, + "step": 202 + }, + { + "epoch": 0.25, + "learning_rate": 1.9850635174117033e-05, + "loss": 1.0732, + "step": 203 + }, + { + "epoch": 0.25, + "learning_rate": 1.984833426412945e-05, + "loss": 0.998, + "step": 204 + }, + { + "epoch": 0.25, + "learning_rate": 1.9846015902413053e-05, + "loss": 1.1185, + "step": 205 + }, + { + "epoch": 0.25, + "learning_rate": 1.984368009307608e-05, + "loss": 1.0758, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 1.98413268402577e-05, + "loss": 0.8996, + "step": 207 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838956148128004e-05, + "loss": 1.0708, + "step": 208 + }, + { + "epoch": 0.26, + "learning_rate": 1.9836568020887963e-05, + "loss": 1.1631, + "step": 209 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834162462769454e-05, + "loss": 1.104, + "step": 210 + }, + { + "epoch": 0.26, + "learning_rate": 1.983173947803525e-05, + "loss": 1.1173, + "step": 211 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829299070978997e-05, + "loss": 0.9888, + "step": 212 + }, + { + "epoch": 0.26, + "learning_rate": 1.982684124592521e-05, + "loss": 1.0857, + "step": 213 + }, + { + "epoch": 0.26, + "learning_rate": 1.9824366007229284e-05, + "loss": 1.0165, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 1.982187335927745e-05, + "loss": 0.9773, + "step": 215 + }, + { + "epoch": 0.27, + "learning_rate": 1.9819363306486814e-05, + "loss": 0.9627, + "step": 216 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816835853305306e-05, + "loss": 1.0736, + "step": 217 + }, + { + "epoch": 0.27, + "learning_rate": 1.9814291004211695e-05, + "loss": 0.9905, + "step": 218 + }, + { + "epoch": 0.27, + "learning_rate": 1.9811728763715587e-05, + "loss": 0.9823, + "step": 219 + }, + { + "epoch": 0.27, + "learning_rate": 1.9809149136357387e-05, + "loss": 0.9941, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806552126708322e-05, + "loss": 1.0074, + "step": 221 + }, + { + "epoch": 0.27, + "learning_rate": 1.980393773937042e-05, + "loss": 0.9188, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 1.980130597897651e-05, + "loss": 0.9262, + "step": 223 + }, + { + "epoch": 0.28, + "learning_rate": 1.9798656850190192e-05, + "loss": 1.0517, + "step": 224 + }, + { + "epoch": 0.28, + "learning_rate": 1.9795990357705853e-05, + "loss": 1.007, + "step": 225 + }, + { + "epoch": 0.28, + "learning_rate": 1.979330650624865e-05, + "loss": 1.06, + "step": 226 + }, + { + "epoch": 0.28, + "learning_rate": 1.97906053005745e-05, + "loss": 1.1018, + "step": 227 + }, + { + "epoch": 0.28, + "learning_rate": 1.9787886745470067e-05, + "loss": 1.0161, + "step": 228 + }, + { + "epoch": 0.28, + "learning_rate": 1.978515084575276e-05, + "loss": 0.9865, + "step": 229 + }, + { + "epoch": 0.28, + "learning_rate": 1.9782397606270738e-05, + "loss": 1.2085, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 1.977962703190287e-05, + "loss": 0.9255, + "step": 231 + }, + { + "epoch": 0.29, + "learning_rate": 1.9776839127558744e-05, + "loss": 1.0249, + "step": 232 + }, + { + "epoch": 0.29, + "learning_rate": 1.9774033898178668e-05, + "loss": 0.9838, + "step": 233 + }, + { + "epoch": 0.29, + "learning_rate": 1.9771211348733644e-05, + "loss": 0.9712, + "step": 234 + }, + { + "epoch": 0.29, + "learning_rate": 1.976837148422537e-05, + "loss": 0.9958, + "step": 235 + }, + { + "epoch": 0.29, + "learning_rate": 1.976551430968622e-05, + "loss": 1.0248, + "step": 236 + }, + { + "epoch": 0.29, + "learning_rate": 1.976263983017925e-05, + "loss": 1.1418, + "step": 237 + }, + { + "epoch": 0.29, + "learning_rate": 1.9759748050798176e-05, + "loss": 0.9808, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 1.9756838976667373e-05, + "loss": 1.1026, + "step": 239 + }, + { + "epoch": 0.3, + "learning_rate": 1.9753912612941867e-05, + "loss": 1.1482, + "step": 240 + }, + { + "epoch": 0.3, + "learning_rate": 1.9750968964807305e-05, + "loss": 1.1188, + "step": 241 + }, + { + "epoch": 0.3, + "learning_rate": 1.9748008037479988e-05, + "loss": 1.0532, + "step": 242 + }, + { + "epoch": 0.3, + "learning_rate": 1.9745029836206813e-05, + "loss": 1.0273, + "step": 243 + }, + { + "epoch": 0.3, + "learning_rate": 1.9742034366265308e-05, + "loss": 1.0363, + "step": 244 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739021632963584e-05, + "loss": 1.0821, + "step": 245 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735991641640354e-05, + "loss": 0.9579, + "step": 246 + }, + { + "epoch": 0.3, + "learning_rate": 1.9732944397664915e-05, + "loss": 1.0749, + "step": 247 + }, + { + "epoch": 0.31, + "learning_rate": 1.9729879906437124e-05, + "loss": 0.9643, + "step": 248 + }, + { + "epoch": 0.31, + "learning_rate": 1.9726798173387417e-05, + "loss": 1.0482, + "step": 249 + }, + { + "epoch": 0.31, + "learning_rate": 1.9723699203976768e-05, + "loss": 1.0518, + "step": 250 + }, + { + "epoch": 0.31, + "learning_rate": 1.9720583003696708e-05, + "loss": 1.0277, + "step": 251 + }, + { + "epoch": 0.31, + "learning_rate": 1.971744957806929e-05, + "loss": 1.0539, + "step": 252 + }, + { + "epoch": 0.31, + "learning_rate": 1.97142989326471e-05, + "loss": 1.1261, + "step": 253 + }, + { + "epoch": 0.31, + "learning_rate": 1.971113107301324e-05, + "loss": 1.0665, + "step": 254 + }, + { + "epoch": 0.31, + "learning_rate": 1.9707946004781305e-05, + "loss": 1.0359, + "step": 255 + }, + { + "epoch": 0.32, + "learning_rate": 1.9704743733595394e-05, + "loss": 1.0673, + "step": 256 + }, + { + "epoch": 0.32, + "learning_rate": 1.9701524265130088e-05, + "loss": 0.9193, + "step": 257 + }, + { + "epoch": 0.32, + "learning_rate": 1.969828760509044e-05, + "loss": 1.0486, + "step": 258 + }, + { + "epoch": 0.32, + "learning_rate": 1.9695033759211972e-05, + "loss": 0.9321, + "step": 259 + }, + { + "epoch": 0.32, + "learning_rate": 1.969176273326066e-05, + "loss": 1.0751, + "step": 260 + }, + { + "epoch": 0.32, + "learning_rate": 1.9688474533032916e-05, + "loss": 1.0429, + "step": 261 + }, + { + "epoch": 0.32, + "learning_rate": 1.9685169164355594e-05, + "loss": 0.9218, + "step": 262 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681846633085968e-05, + "loss": 1.1199, + "step": 263 + }, + { + "epoch": 0.33, + "learning_rate": 1.9678506945111727e-05, + "loss": 1.0038, + "step": 264 + }, + { + "epoch": 0.33, + "learning_rate": 1.9675150106350957e-05, + "loss": 0.987, + "step": 265 + }, + { + "epoch": 0.33, + "learning_rate": 1.9671776122752145e-05, + "loss": 0.9921, + "step": 266 + }, + { + "epoch": 0.33, + "learning_rate": 1.9668385000294156e-05, + "loss": 1.1113, + "step": 267 + }, + { + "epoch": 0.33, + "learning_rate": 1.966497674498622e-05, + "loss": 1.023, + "step": 268 + }, + { + "epoch": 0.33, + "learning_rate": 1.9661551362867926e-05, + "loss": 1.038, + "step": 269 + }, + { + "epoch": 0.33, + "learning_rate": 1.9658108860009234e-05, + "loss": 1.0056, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 1.9654649242510412e-05, + "loss": 1.0078, + "step": 271 + }, + { + "epoch": 0.34, + "learning_rate": 1.965117251650208e-05, + "loss": 0.9425, + "step": 272 + }, + { + "epoch": 0.34, + "learning_rate": 1.9647678688145163e-05, + "loss": 0.8794, + "step": 273 + }, + { + "epoch": 0.34, + "learning_rate": 1.9644167763630892e-05, + "loss": 0.9562, + "step": 274 + }, + { + "epoch": 0.34, + "learning_rate": 1.9640639749180804e-05, + "loss": 1.1024, + "step": 275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9637094651046707e-05, + "loss": 1.0845, + "step": 276 + }, + { + "epoch": 0.34, + "learning_rate": 1.963353247551069e-05, + "loss": 1.0339, + "step": 277 + }, + { + "epoch": 0.34, + "learning_rate": 1.96299532288851e-05, + "loss": 1.0999, + "step": 278 + }, + { + "epoch": 0.34, + "learning_rate": 1.9626356917512538e-05, + "loss": 1.0032, + "step": 279 + }, + { + "epoch": 0.34, + "learning_rate": 1.9622743547765845e-05, + "loss": 1.0988, + "step": 280 + }, + { + "epoch": 0.35, + "learning_rate": 1.9619113126048086e-05, + "loss": 1.1032, + "step": 281 + }, + { + "epoch": 0.35, + "learning_rate": 1.9615465658792546e-05, + "loss": 0.9792, + "step": 282 + }, + { + "epoch": 0.35, + "learning_rate": 1.9611801152462715e-05, + "loss": 0.9861, + "step": 283 + }, + { + "epoch": 0.35, + "learning_rate": 1.9608119613552278e-05, + "loss": 1.0892, + "step": 284 + }, + { + "epoch": 0.35, + "learning_rate": 1.96044210485851e-05, + "loss": 1.0133, + "step": 285 + }, + { + "epoch": 0.35, + "learning_rate": 1.960070546411522e-05, + "loss": 1.0414, + "step": 286 + }, + { + "epoch": 0.35, + "learning_rate": 1.9596972866726835e-05, + "loss": 0.9747, + "step": 287 + }, + { + "epoch": 0.35, + "learning_rate": 1.9593223263034288e-05, + "loss": 0.9685, + "step": 288 + }, + { + "epoch": 0.36, + "learning_rate": 1.958945665968206e-05, + "loss": 0.8743, + "step": 289 + }, + { + "epoch": 0.36, + "learning_rate": 1.9585673063344753e-05, + "loss": 1.0342, + "step": 290 + }, + { + "epoch": 0.36, + "learning_rate": 1.9581872480727095e-05, + "loss": 1.0507, + "step": 291 + }, + { + "epoch": 0.36, + "learning_rate": 1.9578054918563893e-05, + "loss": 1.0207, + "step": 292 + }, + { + "epoch": 0.36, + "learning_rate": 1.9574220383620054e-05, + "loss": 1.0882, + "step": 293 + }, + { + "epoch": 0.36, + "learning_rate": 1.9570368882690572e-05, + "loss": 0.9782, + "step": 294 + }, + { + "epoch": 0.36, + "learning_rate": 1.9566500422600482e-05, + "loss": 1.1303, + "step": 295 + }, + { + "epoch": 0.36, + "learning_rate": 1.956261501020489e-05, + "loss": 1.2799, + "step": 296 + }, + { + "epoch": 0.37, + "learning_rate": 1.9558712652388932e-05, + "loss": 1.0535, + "step": 297 + }, + { + "epoch": 0.37, + "learning_rate": 1.955479335606778e-05, + "loss": 1.0697, + "step": 298 + }, + { + "epoch": 0.37, + "learning_rate": 1.955085712818662e-05, + "loss": 1.1006, + "step": 299 + }, + { + "epoch": 0.37, + "learning_rate": 1.9546903975720636e-05, + "loss": 1.0432, + "step": 300 + }, + { + "epoch": 0.37, + "learning_rate": 1.954293390567501e-05, + "loss": 1.0783, + "step": 301 + }, + { + "epoch": 0.37, + "learning_rate": 1.9538946925084898e-05, + "loss": 1.0622, + "step": 302 + }, + { + "epoch": 0.37, + "learning_rate": 1.9534943041015425e-05, + "loss": 0.8598, + "step": 303 + }, + { + "epoch": 0.37, + "learning_rate": 1.953092226056167e-05, + "loss": 1.0235, + "step": 304 + }, + { + "epoch": 0.38, + "learning_rate": 1.9526884590848646e-05, + "loss": 1.0579, + "step": 305 + }, + { + "epoch": 0.38, + "learning_rate": 1.952283003903131e-05, + "loss": 0.8823, + "step": 306 + }, + { + "epoch": 0.38, + "learning_rate": 1.951875861229452e-05, + "loss": 0.9815, + "step": 307 + }, + { + "epoch": 0.38, + "learning_rate": 1.9514670317853043e-05, + "loss": 1.0394, + "step": 308 + }, + { + "epoch": 0.38, + "learning_rate": 1.9510565162951538e-05, + "loss": 1.1034, + "step": 309 + }, + { + "epoch": 0.38, + "learning_rate": 1.9506443154864536e-05, + "loss": 1.0242, + "step": 310 + }, + { + "epoch": 0.38, + "learning_rate": 1.9502304300896442e-05, + "loss": 0.9295, + "step": 311 + }, + { + "epoch": 0.38, + "learning_rate": 1.9498148608381506e-05, + "loss": 0.9371, + "step": 312 + }, + { + "epoch": 0.39, + "learning_rate": 1.9493976084683814e-05, + "loss": 0.9686, + "step": 313 + }, + { + "epoch": 0.39, + "learning_rate": 1.9489786737197286e-05, + "loss": 1.0366, + "step": 314 + }, + { + "epoch": 0.39, + "learning_rate": 1.9485580573345645e-05, + "loss": 1.1104, + "step": 315 + }, + { + "epoch": 0.39, + "learning_rate": 1.9481357600582425e-05, + "loss": 1.0952, + "step": 316 + }, + { + "epoch": 0.39, + "learning_rate": 1.9477117826390934e-05, + "loss": 1.0771, + "step": 317 + }, + { + "epoch": 0.39, + "learning_rate": 1.947286125828426e-05, + "loss": 1.003, + "step": 318 + }, + { + "epoch": 0.39, + "learning_rate": 1.9468587903805256e-05, + "loss": 1.0663, + "step": 319 + }, + { + "epoch": 0.39, + "learning_rate": 1.9464297770526507e-05, + "loss": 1.0059, + "step": 320 + }, + { + "epoch": 0.4, + "learning_rate": 1.9459990866050337e-05, + "loss": 1.0338, + "step": 321 + }, + { + "epoch": 0.4, + "learning_rate": 1.94556671980088e-05, + "loss": 1.0562, + "step": 322 + }, + { + "epoch": 0.4, + "learning_rate": 1.9451326774063636e-05, + "loss": 1.1058, + "step": 323 + }, + { + "epoch": 0.4, + "learning_rate": 1.94469696019063e-05, + "loss": 1.1528, + "step": 324 + }, + { + "epoch": 0.4, + "learning_rate": 1.9442595689257898e-05, + "loss": 1.0276, + "step": 325 + }, + { + "epoch": 0.4, + "learning_rate": 1.9438205043869232e-05, + "loss": 1.0071, + "step": 326 + }, + { + "epoch": 0.4, + "learning_rate": 1.943379767352073e-05, + "loss": 1.0381, + "step": 327 + }, + { + "epoch": 0.4, + "learning_rate": 1.9429373586022472e-05, + "loss": 1.0016, + "step": 328 + }, + { + "epoch": 0.41, + "learning_rate": 1.9424932789214158e-05, + "loss": 0.9238, + "step": 329 + }, + { + "epoch": 0.41, + "learning_rate": 1.942047529096509e-05, + "loss": 1.0245, + "step": 330 + }, + { + "epoch": 0.41, + "learning_rate": 1.9416001099174183e-05, + "loss": 1.0655, + "step": 331 + }, + { + "epoch": 0.41, + "learning_rate": 1.941151022176991e-05, + "loss": 1.0373, + "step": 332 + }, + { + "epoch": 0.41, + "learning_rate": 1.9407002666710334e-05, + "loss": 1.0664, + "step": 333 + }, + { + "epoch": 0.41, + "learning_rate": 1.9402478441983058e-05, + "loss": 1.0436, + "step": 334 + }, + { + "epoch": 0.41, + "learning_rate": 1.9397937555605235e-05, + "loss": 0.9819, + "step": 335 + }, + { + "epoch": 0.41, + "learning_rate": 1.9393380015623535e-05, + "loss": 1.1562, + "step": 336 + }, + { + "epoch": 0.42, + "learning_rate": 1.9388805830114132e-05, + "loss": 0.8764, + "step": 337 + }, + { + "epoch": 0.42, + "learning_rate": 1.938421500718272e-05, + "loss": 1.1058, + "step": 338 + }, + { + "epoch": 0.42, + "learning_rate": 1.937960755496445e-05, + "loss": 0.9259, + "step": 339 + }, + { + "epoch": 0.42, + "learning_rate": 1.937498348162396e-05, + "loss": 0.9354, + "step": 340 + }, + { + "epoch": 0.42, + "learning_rate": 1.937034279535533e-05, + "loss": 0.9948, + "step": 341 + }, + { + "epoch": 0.42, + "learning_rate": 1.936568550438208e-05, + "loss": 1.0582, + "step": 342 + }, + { + "epoch": 0.42, + "learning_rate": 1.9361011616957165e-05, + "loss": 1.0361, + "step": 343 + }, + { + "epoch": 0.42, + "learning_rate": 1.935632114136293e-05, + "loss": 0.9712, + "step": 344 + }, + { + "epoch": 0.43, + "learning_rate": 1.9351614085911134e-05, + "loss": 1.1029, + "step": 345 + }, + { + "epoch": 0.43, + "learning_rate": 1.9346890458942914e-05, + "loss": 0.9734, + "step": 346 + }, + { + "epoch": 0.43, + "learning_rate": 1.9342150268828754e-05, + "loss": 1.0093, + "step": 347 + }, + { + "epoch": 0.43, + "learning_rate": 1.933739352396851e-05, + "loss": 1.0836, + "step": 348 + }, + { + "epoch": 0.43, + "learning_rate": 1.933262023279137e-05, + "loss": 1.0266, + "step": 349 + }, + { + "epoch": 0.43, + "learning_rate": 1.9327830403755832e-05, + "loss": 1.1339, + "step": 350 + }, + { + "epoch": 0.43, + "learning_rate": 1.9323024045349704e-05, + "loss": 0.9362, + "step": 351 + }, + { + "epoch": 0.43, + "learning_rate": 1.9318201166090097e-05, + "loss": 0.9422, + "step": 352 + }, + { + "epoch": 0.43, + "learning_rate": 1.9313361774523387e-05, + "loss": 1.1174, + "step": 353 + }, + { + "epoch": 0.44, + "learning_rate": 1.9308505879225207e-05, + "loss": 0.8944, + "step": 354 + }, + { + "epoch": 0.44, + "learning_rate": 1.930363348880044e-05, + "loss": 1.0049, + "step": 355 + }, + { + "epoch": 0.44, + "learning_rate": 1.92987446118832e-05, + "loss": 0.993, + "step": 356 + }, + { + "epoch": 0.44, + "learning_rate": 1.929383925713682e-05, + "loss": 1.0758, + "step": 357 + }, + { + "epoch": 0.44, + "learning_rate": 1.9288917433253823e-05, + "loss": 1.024, + "step": 358 + }, + { + "epoch": 0.44, + "learning_rate": 1.9283979148955927e-05, + "loss": 1.0144, + "step": 359 + }, + { + "epoch": 0.44, + "learning_rate": 1.927902441299401e-05, + "loss": 1.0515, + "step": 360 + }, + { + "epoch": 0.44, + "learning_rate": 1.92740532341481e-05, + "loss": 1.1693, + "step": 361 + }, + { + "epoch": 0.45, + "learning_rate": 1.9269065621227376e-05, + "loss": 1.1387, + "step": 362 + }, + { + "epoch": 0.45, + "learning_rate": 1.9264061583070126e-05, + "loss": 1.0449, + "step": 363 + }, + { + "epoch": 0.45, + "learning_rate": 1.9259041128543754e-05, + "loss": 0.9822, + "step": 364 + }, + { + "epoch": 0.45, + "learning_rate": 1.925400426654475e-05, + "loss": 1.0456, + "step": 365 + }, + { + "epoch": 0.45, + "learning_rate": 1.9248951005998678e-05, + "loss": 0.99, + "step": 366 + }, + { + "epoch": 0.45, + "learning_rate": 1.9243881355860163e-05, + "loss": 0.9867, + "step": 367 + }, + { + "epoch": 0.45, + "learning_rate": 1.9238795325112867e-05, + "loss": 1.1193, + "step": 368 + }, + { + "epoch": 0.45, + "learning_rate": 1.9233692922769497e-05, + "loss": 1.0152, + "step": 369 + }, + { + "epoch": 0.46, + "learning_rate": 1.9228574157871745e-05, + "loss": 0.9656, + "step": 370 + }, + { + "epoch": 0.46, + "learning_rate": 1.922343903949032e-05, + "loss": 1.0229, + "step": 371 + }, + { + "epoch": 0.46, + "learning_rate": 1.9218287576724903e-05, + "loss": 1.0, + "step": 372 + }, + { + "epoch": 0.46, + "learning_rate": 1.921311977870413e-05, + "loss": 1.0122, + "step": 373 + }, + { + "epoch": 0.46, + "learning_rate": 1.92079356545856e-05, + "loss": 1.0574, + "step": 374 + }, + { + "epoch": 0.46, + "learning_rate": 1.920273521355583e-05, + "loss": 0.9855, + "step": 375 + }, + { + "epoch": 0.46, + "learning_rate": 1.9197518464830255e-05, + "loss": 0.8773, + "step": 376 + }, + { + "epoch": 0.46, + "learning_rate": 1.9192285417653208e-05, + "loss": 1.0066, + "step": 377 + }, + { + "epoch": 0.47, + "learning_rate": 1.9187036081297907e-05, + "loss": 1.1173, + "step": 378 + }, + { + "epoch": 0.47, + "learning_rate": 1.9181770465066426e-05, + "loss": 1.0675, + "step": 379 + }, + { + "epoch": 0.47, + "learning_rate": 1.91764885782897e-05, + "loss": 1.0952, + "step": 380 + }, + { + "epoch": 0.47, + "learning_rate": 1.917119043032749e-05, + "loss": 0.898, + "step": 381 + }, + { + "epoch": 0.47, + "learning_rate": 1.9165876030568365e-05, + "loss": 1.0363, + "step": 382 + }, + { + "epoch": 0.47, + "learning_rate": 1.916054538842971e-05, + "loss": 1.1136, + "step": 383 + }, + { + "epoch": 0.47, + "learning_rate": 1.9155198513357677e-05, + "loss": 1.0828, + "step": 384 + }, + { + "epoch": 0.47, + "learning_rate": 1.9149835414827193e-05, + "loss": 0.9551, + "step": 385 + }, + { + "epoch": 0.48, + "learning_rate": 1.9144456102341928e-05, + "loss": 1.0687, + "step": 386 + }, + { + "epoch": 0.48, + "learning_rate": 1.9139060585434287e-05, + "loss": 0.9852, + "step": 387 + }, + { + "epoch": 0.48, + "learning_rate": 1.913364887366539e-05, + "loss": 0.9951, + "step": 388 + }, + { + "epoch": 0.48, + "learning_rate": 1.912822097662505e-05, + "loss": 1.1121, + "step": 389 + }, + { + "epoch": 0.48, + "learning_rate": 1.9122776903931776e-05, + "loss": 0.9408, + "step": 390 + }, + { + "epoch": 0.48, + "learning_rate": 1.9117316665232715e-05, + "loss": 1.1265, + "step": 391 + }, + { + "epoch": 0.48, + "learning_rate": 1.911184027020369e-05, + "loss": 0.9619, + "step": 392 + }, + { + "epoch": 0.48, + "learning_rate": 1.9106347728549134e-05, + "loss": 0.9748, + "step": 393 + }, + { + "epoch": 0.49, + "learning_rate": 1.9100839050002098e-05, + "loss": 1.0213, + "step": 394 + }, + { + "epoch": 0.49, + "learning_rate": 1.9095314244324233e-05, + "loss": 1.136, + "step": 395 + }, + { + "epoch": 0.49, + "learning_rate": 1.908977332130576e-05, + "loss": 1.0657, + "step": 396 + }, + { + "epoch": 0.49, + "learning_rate": 1.908421629076547e-05, + "loss": 0.9701, + "step": 397 + }, + { + "epoch": 0.49, + "learning_rate": 1.9078643162550686e-05, + "loss": 1.0995, + "step": 398 + }, + { + "epoch": 0.49, + "learning_rate": 1.9073053946537265e-05, + "loss": 0.9926, + "step": 399 + }, + { + "epoch": 0.49, + "learning_rate": 1.9067448652629573e-05, + "loss": 1.1041, + "step": 400 + }, + { + "epoch": 0.49, + "learning_rate": 1.9061827290760466e-05, + "loss": 0.9751, + "step": 401 + }, + { + "epoch": 0.5, + "learning_rate": 1.9056189870891266e-05, + "loss": 1.0071, + "step": 402 + }, + { + "epoch": 0.5, + "learning_rate": 1.905053640301176e-05, + "loss": 1.0108, + "step": 403 + }, + { + "epoch": 0.5, + "learning_rate": 1.9044866897140165e-05, + "loss": 0.9498, + "step": 404 + }, + { + "epoch": 0.5, + "learning_rate": 1.9039181363323128e-05, + "loss": 1.1555, + "step": 405 + }, + { + "epoch": 0.5, + "learning_rate": 1.9033479811635687e-05, + "loss": 1.0037, + "step": 406 + }, + { + "epoch": 0.5, + "learning_rate": 1.9027762252181272e-05, + "loss": 1.0307, + "step": 407 + }, + { + "epoch": 0.5, + "learning_rate": 1.9022028695091678e-05, + "loss": 1.1141, + "step": 408 + }, + { + "epoch": 0.5, + "learning_rate": 1.9016279150527044e-05, + "loss": 0.9423, + "step": 409 + }, + { + "epoch": 0.51, + "learning_rate": 1.901051362867585e-05, + "loss": 0.9885, + "step": 410 + }, + { + "epoch": 0.51, + "learning_rate": 1.9004732139754875e-05, + "loss": 1.0891, + "step": 411 + }, + { + "epoch": 0.51, + "learning_rate": 1.8998934694009207e-05, + "loss": 0.8694, + "step": 412 + }, + { + "epoch": 0.51, + "learning_rate": 1.8993121301712194e-05, + "loss": 1.0453, + "step": 413 + }, + { + "epoch": 0.51, + "learning_rate": 1.898729197316546e-05, + "loss": 1.064, + "step": 414 + }, + { + "epoch": 0.51, + "learning_rate": 1.898144671869885e-05, + "loss": 1.0784, + "step": 415 + }, + { + "epoch": 0.51, + "learning_rate": 1.8975585548670444e-05, + "loss": 1.0275, + "step": 416 + }, + { + "epoch": 0.51, + "learning_rate": 1.896970847346653e-05, + "loss": 1.0602, + "step": 417 + }, + { + "epoch": 0.52, + "learning_rate": 1.896381550350156e-05, + "loss": 0.986, + "step": 418 + }, + { + "epoch": 0.52, + "learning_rate": 1.8957906649218167e-05, + "loss": 0.9449, + "step": 419 + }, + { + "epoch": 0.52, + "learning_rate": 1.8951981921087133e-05, + "loss": 1.0912, + "step": 420 + }, + { + "epoch": 0.52, + "learning_rate": 1.8946041329607364e-05, + "loss": 0.9684, + "step": 421 + }, + { + "epoch": 0.52, + "learning_rate": 1.8940084885305875e-05, + "loss": 1.0854, + "step": 422 + }, + { + "epoch": 0.52, + "learning_rate": 1.8934112598737777e-05, + "loss": 0.9689, + "step": 423 + }, + { + "epoch": 0.52, + "learning_rate": 1.8928124480486258e-05, + "loss": 1.108, + "step": 424 + }, + { + "epoch": 0.52, + "learning_rate": 1.892212054116255e-05, + "loss": 0.9416, + "step": 425 + }, + { + "epoch": 0.52, + "learning_rate": 1.8916100791405925e-05, + "loss": 1.0297, + "step": 426 + }, + { + "epoch": 0.53, + "learning_rate": 1.891006524188368e-05, + "loss": 0.8853, + "step": 427 + }, + { + "epoch": 0.53, + "learning_rate": 1.89040139032911e-05, + "loss": 1.0542, + "step": 428 + }, + { + "epoch": 0.53, + "learning_rate": 1.889794678635145e-05, + "loss": 0.9289, + "step": 429 + }, + { + "epoch": 0.53, + "learning_rate": 1.8891863901815962e-05, + "loss": 0.9652, + "step": 430 + }, + { + "epoch": 0.53, + "learning_rate": 1.88857652604638e-05, + "loss": 0.8988, + "step": 431 + }, + { + "epoch": 0.53, + "learning_rate": 1.8879650873102055e-05, + "loss": 1.0342, + "step": 432 + }, + { + "epoch": 0.53, + "learning_rate": 1.8873520750565716e-05, + "loss": 0.9505, + "step": 433 + }, + { + "epoch": 0.53, + "learning_rate": 1.886737490371767e-05, + "loss": 0.9446, + "step": 434 + }, + { + "epoch": 0.54, + "learning_rate": 1.8861213343448645e-05, + "loss": 1.1764, + "step": 435 + }, + { + "epoch": 0.54, + "learning_rate": 1.885503608067724e-05, + "loss": 1.2689, + "step": 436 + }, + { + "epoch": 0.54, + "learning_rate": 1.884884312634985e-05, + "loss": 1.0402, + "step": 437 + }, + { + "epoch": 0.54, + "learning_rate": 1.8842634491440704e-05, + "loss": 1.0064, + "step": 438 + }, + { + "epoch": 0.54, + "learning_rate": 1.8836410186951805e-05, + "loss": 1.1046, + "step": 439 + }, + { + "epoch": 0.54, + "learning_rate": 1.883017022391292e-05, + "loss": 0.9783, + "step": 440 + }, + { + "epoch": 0.54, + "learning_rate": 1.8823914613381568e-05, + "loss": 0.9758, + "step": 441 + }, + { + "epoch": 0.54, + "learning_rate": 1.8817643366443e-05, + "loss": 0.9861, + "step": 442 + }, + { + "epoch": 0.55, + "learning_rate": 1.8811356494210166e-05, + "loss": 1.0803, + "step": 443 + }, + { + "epoch": 0.55, + "learning_rate": 1.8805054007823716e-05, + "loss": 1.0105, + "step": 444 + }, + { + "epoch": 0.55, + "learning_rate": 1.8798735918451963e-05, + "loss": 1.0224, + "step": 445 + }, + { + "epoch": 0.55, + "learning_rate": 1.8792402237290865e-05, + "loss": 1.0147, + "step": 446 + }, + { + "epoch": 0.55, + "learning_rate": 1.878605297556402e-05, + "loss": 1.057, + "step": 447 + }, + { + "epoch": 0.55, + "learning_rate": 1.8779688144522625e-05, + "loss": 0.9985, + "step": 448 + }, + { + "epoch": 0.55, + "learning_rate": 1.8773307755445468e-05, + "loss": 0.9799, + "step": 449 + }, + { + "epoch": 0.55, + "learning_rate": 1.8766911819638917e-05, + "loss": 1.0228, + "step": 450 + }, + { + "epoch": 0.56, + "learning_rate": 1.876050034843688e-05, + "loss": 0.9089, + "step": 451 + }, + { + "epoch": 0.56, + "learning_rate": 1.8754073353200796e-05, + "loss": 0.9996, + "step": 452 + }, + { + "epoch": 0.56, + "learning_rate": 1.874763084531961e-05, + "loss": 0.9506, + "step": 453 + }, + { + "epoch": 0.56, + "learning_rate": 1.8741172836209773e-05, + "loss": 1.0606, + "step": 454 + }, + { + "epoch": 0.56, + "learning_rate": 1.873469933731518e-05, + "loss": 0.9391, + "step": 455 + }, + { + "epoch": 0.56, + "learning_rate": 1.872821036010719e-05, + "loss": 0.9784, + "step": 456 + }, + { + "epoch": 0.56, + "learning_rate": 1.872170591608459e-05, + "loss": 0.9766, + "step": 457 + }, + { + "epoch": 0.56, + "learning_rate": 1.871518601677357e-05, + "loss": 0.9846, + "step": 458 + }, + { + "epoch": 0.57, + "learning_rate": 1.8708650673727708e-05, + "loss": 1.0851, + "step": 459 + }, + { + "epoch": 0.57, + "learning_rate": 1.8702099898527955e-05, + "loss": 0.8785, + "step": 460 + }, + { + "epoch": 0.57, + "learning_rate": 1.86955337027826e-05, + "loss": 1.0858, + "step": 461 + }, + { + "epoch": 0.57, + "learning_rate": 1.8688952098127265e-05, + "loss": 1.1099, + "step": 462 + }, + { + "epoch": 0.57, + "learning_rate": 1.8682355096224873e-05, + "loss": 1.0563, + "step": 463 + }, + { + "epoch": 0.57, + "learning_rate": 1.8675742708765633e-05, + "loss": 1.0595, + "step": 464 + }, + { + "epoch": 0.57, + "learning_rate": 1.866911494746702e-05, + "loss": 0.8897, + "step": 465 + }, + { + "epoch": 0.57, + "learning_rate": 1.866247182407375e-05, + "loss": 1.0525, + "step": 466 + }, + { + "epoch": 0.58, + "learning_rate": 1.8655813350357764e-05, + "loss": 0.9812, + "step": 467 + }, + { + "epoch": 0.58, + "learning_rate": 1.8649139538118196e-05, + "loss": 1.0569, + "step": 468 + }, + { + "epoch": 0.58, + "learning_rate": 1.8642450399181373e-05, + "loss": 1.156, + "step": 469 + }, + { + "epoch": 0.58, + "learning_rate": 1.8635745945400772e-05, + "loss": 1.0206, + "step": 470 + }, + { + "epoch": 0.58, + "learning_rate": 1.862902618865701e-05, + "loss": 0.9696, + "step": 471 + }, + { + "epoch": 0.58, + "learning_rate": 1.862229114085783e-05, + "loss": 1.0654, + "step": 472 + }, + { + "epoch": 0.58, + "learning_rate": 1.8615540813938063e-05, + "loss": 0.9939, + "step": 473 + }, + { + "epoch": 0.58, + "learning_rate": 1.8608775219859618e-05, + "loss": 1.008, + "step": 474 + }, + { + "epoch": 0.59, + "learning_rate": 1.8601994370611452e-05, + "loss": 0.9662, + "step": 475 + }, + { + "epoch": 0.59, + "learning_rate": 1.859519827820957e-05, + "loss": 0.9245, + "step": 476 + }, + { + "epoch": 0.59, + "learning_rate": 1.8588386954696972e-05, + "loss": 1.0275, + "step": 477 + }, + { + "epoch": 0.59, + "learning_rate": 1.8581560412143663e-05, + "loss": 0.9314, + "step": 478 + }, + { + "epoch": 0.59, + "learning_rate": 1.85747186626466e-05, + "loss": 0.9696, + "step": 479 + }, + { + "epoch": 0.59, + "learning_rate": 1.8567861718329705e-05, + "loss": 1.1141, + "step": 480 + }, + { + "epoch": 0.59, + "learning_rate": 1.856098959134381e-05, + "loss": 0.9806, + "step": 481 + }, + { + "epoch": 0.59, + "learning_rate": 1.855410229386667e-05, + "loss": 0.9459, + "step": 482 + }, + { + "epoch": 0.6, + "learning_rate": 1.8547199838102904e-05, + "loss": 1.0808, + "step": 483 + }, + { + "epoch": 0.6, + "learning_rate": 1.8540282236284005e-05, + "loss": 0.9773, + "step": 484 + }, + { + "epoch": 0.6, + "learning_rate": 1.8533349500668295e-05, + "loss": 1.1119, + "step": 485 + }, + { + "epoch": 0.6, + "learning_rate": 1.8526401643540924e-05, + "loss": 1.0087, + "step": 486 + }, + { + "epoch": 0.6, + "learning_rate": 1.8519438677213834e-05, + "loss": 1.0375, + "step": 487 + }, + { + "epoch": 0.6, + "learning_rate": 1.851246061402574e-05, + "loss": 1.0478, + "step": 488 + }, + { + "epoch": 0.6, + "learning_rate": 1.850546746634211e-05, + "loss": 1.0229, + "step": 489 + }, + { + "epoch": 0.6, + "learning_rate": 1.8498459246555143e-05, + "loss": 0.9939, + "step": 490 + }, + { + "epoch": 0.6, + "learning_rate": 1.849143596708375e-05, + "loss": 1.0645, + "step": 491 + }, + { + "epoch": 0.61, + "learning_rate": 1.8484397640373517e-05, + "loss": 0.8781, + "step": 492 + }, + { + "epoch": 0.61, + "learning_rate": 1.8477344278896708e-05, + "loss": 0.9516, + "step": 493 + }, + { + "epoch": 0.61, + "learning_rate": 1.8470275895152228e-05, + "loss": 1.0237, + "step": 494 + }, + { + "epoch": 0.61, + "learning_rate": 1.846319250166559e-05, + "loss": 1.069, + "step": 495 + }, + { + "epoch": 0.61, + "learning_rate": 1.8456094110988914e-05, + "loss": 1.0403, + "step": 496 + }, + { + "epoch": 0.61, + "learning_rate": 1.84489807357009e-05, + "loss": 1.0142, + "step": 497 + }, + { + "epoch": 0.61, + "learning_rate": 1.8441852388406788e-05, + "loss": 1.0581, + "step": 498 + }, + { + "epoch": 0.61, + "learning_rate": 1.8434709081738364e-05, + "loss": 0.9616, + "step": 499 + }, + { + "epoch": 0.62, + "learning_rate": 1.8427550828353912e-05, + "loss": 1.028, + "step": 500 + }, + { + "epoch": 0.62, + "learning_rate": 1.8420377640938204e-05, + "loss": 1.0888, + "step": 501 + }, + { + "epoch": 0.62, + "learning_rate": 1.8413189532202488e-05, + "loss": 1.0171, + "step": 502 + }, + { + "epoch": 0.62, + "learning_rate": 1.840598651488443e-05, + "loss": 1.0198, + "step": 503 + }, + { + "epoch": 0.62, + "learning_rate": 1.8398768601748143e-05, + "loss": 0.9213, + "step": 504 + }, + { + "epoch": 0.62, + "learning_rate": 1.839153580558411e-05, + "loss": 0.9659, + "step": 505 + }, + { + "epoch": 0.62, + "learning_rate": 1.8384288139209204e-05, + "loss": 1.0121, + "step": 506 + }, + { + "epoch": 0.62, + "learning_rate": 1.837702561546664e-05, + "loss": 0.9429, + "step": 507 + }, + { + "epoch": 0.63, + "learning_rate": 1.8369748247225965e-05, + "loss": 0.9574, + "step": 508 + }, + { + "epoch": 0.63, + "learning_rate": 1.8362456047383032e-05, + "loss": 0.8838, + "step": 509 + }, + { + "epoch": 0.63, + "learning_rate": 1.8355149028859975e-05, + "loss": 1.0912, + "step": 510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8347827204605187e-05, + "loss": 1.0289, + "step": 511 + }, + { + "epoch": 0.63, + "learning_rate": 1.83404905875933e-05, + "loss": 0.9732, + "step": 512 + }, + { + "epoch": 0.63, + "learning_rate": 1.833313919082515e-05, + "loss": 0.9304, + "step": 513 + }, + { + "epoch": 0.63, + "learning_rate": 1.832577302732778e-05, + "loss": 0.9851, + "step": 514 + }, + { + "epoch": 0.63, + "learning_rate": 1.8318392110154387e-05, + "loss": 0.9973, + "step": 515 + }, + { + "epoch": 0.64, + "learning_rate": 1.8310996452384312e-05, + "loss": 1.033, + "step": 516 + }, + { + "epoch": 0.64, + "learning_rate": 1.8303586067123028e-05, + "loss": 1.1081, + "step": 517 + }, + { + "epoch": 0.64, + "learning_rate": 1.82961609675021e-05, + "loss": 1.0485, + "step": 518 + }, + { + "epoch": 0.64, + "learning_rate": 1.828872116667916e-05, + "loss": 1.0581, + "step": 519 + }, + { + "epoch": 0.64, + "learning_rate": 1.82812666778379e-05, + "loss": 1.0565, + "step": 520 + }, + { + "epoch": 0.64, + "learning_rate": 1.8273797514188043e-05, + "loss": 1.0394, + "step": 521 + }, + { + "epoch": 0.64, + "learning_rate": 1.8266313688965307e-05, + "loss": 0.9207, + "step": 522 + }, + { + "epoch": 0.64, + "learning_rate": 1.8258815215431395e-05, + "loss": 1.1348, + "step": 523 + }, + { + "epoch": 0.65, + "learning_rate": 1.825130210687397e-05, + "loss": 1.0388, + "step": 524 + }, + { + "epoch": 0.65, + "learning_rate": 1.824377437660663e-05, + "loss": 1.0929, + "step": 525 + }, + { + "epoch": 0.65, + "learning_rate": 1.8236232037968873e-05, + "loss": 0.9733, + "step": 526 + }, + { + "epoch": 0.65, + "learning_rate": 1.8228675104326096e-05, + "loss": 1.1005, + "step": 527 + }, + { + "epoch": 0.65, + "learning_rate": 1.8221103589069553e-05, + "loss": 1.0583, + "step": 528 + }, + { + "epoch": 0.65, + "learning_rate": 1.821351750561634e-05, + "loss": 1.0498, + "step": 529 + }, + { + "epoch": 0.65, + "learning_rate": 1.820591686740936e-05, + "loss": 1.0468, + "step": 530 + }, + { + "epoch": 0.65, + "learning_rate": 1.8198301687917325e-05, + "loss": 1.0576, + "step": 531 + }, + { + "epoch": 0.66, + "learning_rate": 1.8190671980634698e-05, + "loss": 1.0863, + "step": 532 + }, + { + "epoch": 0.66, + "learning_rate": 1.818302775908169e-05, + "loss": 0.9544, + "step": 533 + }, + { + "epoch": 0.66, + "learning_rate": 1.8175369036804243e-05, + "loss": 1.0569, + "step": 534 + }, + { + "epoch": 0.66, + "learning_rate": 1.8167695827373982e-05, + "loss": 0.9525, + "step": 535 + }, + { + "epoch": 0.66, + "learning_rate": 1.8160008144388212e-05, + "loss": 1.044, + "step": 536 + }, + { + "epoch": 0.66, + "learning_rate": 1.8152306001469875e-05, + "loss": 0.8825, + "step": 537 + }, + { + "epoch": 0.66, + "learning_rate": 1.814458941226755e-05, + "loss": 1.0051, + "step": 538 + }, + { + "epoch": 0.66, + "learning_rate": 1.8136858390455406e-05, + "loss": 0.964, + "step": 539 + }, + { + "epoch": 0.67, + "learning_rate": 1.8129112949733193e-05, + "loss": 1.2041, + "step": 540 + }, + { + "epoch": 0.67, + "learning_rate": 1.8121353103826213e-05, + "loss": 1.0948, + "step": 541 + }, + { + "epoch": 0.67, + "learning_rate": 1.8113578866485288e-05, + "loss": 0.9717, + "step": 542 + }, + { + "epoch": 0.67, + "learning_rate": 1.810579025148674e-05, + "loss": 1.0894, + "step": 543 + }, + { + "epoch": 0.67, + "learning_rate": 1.8097987272632384e-05, + "loss": 0.963, + "step": 544 + }, + { + "epoch": 0.67, + "learning_rate": 1.8090169943749477e-05, + "loss": 1.0476, + "step": 545 + }, + { + "epoch": 0.67, + "learning_rate": 1.8082338278690704e-05, + "loss": 1.1185, + "step": 546 + }, + { + "epoch": 0.67, + "learning_rate": 1.807449229133416e-05, + "loss": 1.0054, + "step": 547 + }, + { + "epoch": 0.68, + "learning_rate": 1.8066631995583318e-05, + "loss": 1.0131, + "step": 548 + }, + { + "epoch": 0.68, + "learning_rate": 1.8058757405367003e-05, + "loss": 1.1011, + "step": 549 + }, + { + "epoch": 0.68, + "learning_rate": 1.805086853463938e-05, + "loss": 1.0835, + "step": 550 + }, + { + "epoch": 0.68, + "learning_rate": 1.8042965397379904e-05, + "loss": 0.9314, + "step": 551 + }, + { + "epoch": 0.68, + "learning_rate": 1.8035048007593322e-05, + "loss": 1.0555, + "step": 552 + }, + { + "epoch": 0.68, + "learning_rate": 1.8027116379309637e-05, + "loss": 1.0529, + "step": 553 + }, + { + "epoch": 0.68, + "learning_rate": 1.8019170526584083e-05, + "loss": 0.9156, + "step": 554 + }, + { + "epoch": 0.68, + "learning_rate": 1.8011210463497095e-05, + "loss": 1.0872, + "step": 555 + }, + { + "epoch": 0.69, + "learning_rate": 1.8003236204154296e-05, + "loss": 0.9799, + "step": 556 + }, + { + "epoch": 0.69, + "learning_rate": 1.799524776268646e-05, + "loss": 1.0613, + "step": 557 + }, + { + "epoch": 0.69, + "learning_rate": 1.7987245153249496e-05, + "loss": 0.8917, + "step": 558 + }, + { + "epoch": 0.69, + "learning_rate": 1.7979228390024417e-05, + "loss": 1.006, + "step": 559 + }, + { + "epoch": 0.69, + "learning_rate": 1.7971197487217322e-05, + "loss": 1.07, + "step": 560 + }, + { + "epoch": 0.69, + "learning_rate": 1.796315245905936e-05, + "loss": 0.9515, + "step": 561 + }, + { + "epoch": 0.69, + "learning_rate": 1.795509331980672e-05, + "loss": 0.9812, + "step": 562 + }, + { + "epoch": 0.69, + "learning_rate": 1.7947020083740575e-05, + "loss": 1.137, + "step": 563 + }, + { + "epoch": 0.69, + "learning_rate": 1.7938932765167107e-05, + "loss": 1.1004, + "step": 564 + }, + { + "epoch": 0.7, + "learning_rate": 1.7930831378417437e-05, + "loss": 1.0575, + "step": 565 + }, + { + "epoch": 0.7, + "learning_rate": 1.792271593784761e-05, + "loss": 1.1174, + "step": 566 + }, + { + "epoch": 0.7, + "learning_rate": 1.7914586457838592e-05, + "loss": 1.0646, + "step": 567 + }, + { + "epoch": 0.7, + "learning_rate": 1.7906442952796212e-05, + "loss": 1.0319, + "step": 568 + }, + { + "epoch": 0.7, + "learning_rate": 1.7898285437151163e-05, + "loss": 1.0151, + "step": 569 + }, + { + "epoch": 0.7, + "learning_rate": 1.7890113925358954e-05, + "loss": 1.0234, + "step": 570 + }, + { + "epoch": 0.7, + "learning_rate": 1.788192843189991e-05, + "loss": 1.2047, + "step": 571 + }, + { + "epoch": 0.7, + "learning_rate": 1.7873728971279116e-05, + "loss": 1.0391, + "step": 572 + }, + { + "epoch": 0.71, + "learning_rate": 1.786551555802643e-05, + "loss": 0.9231, + "step": 573 + }, + { + "epoch": 0.71, + "learning_rate": 1.7857288206696405e-05, + "loss": 1.0342, + "step": 574 + }, + { + "epoch": 0.71, + "learning_rate": 1.784904693186832e-05, + "loss": 0.971, + "step": 575 + }, + { + "epoch": 0.71, + "learning_rate": 1.7840791748146112e-05, + "loss": 1.0648, + "step": 576 + }, + { + "epoch": 0.71, + "learning_rate": 1.783252267015837e-05, + "loss": 1.0146, + "step": 577 + }, + { + "epoch": 0.71, + "learning_rate": 1.7824239712558303e-05, + "loss": 0.978, + "step": 578 + }, + { + "epoch": 0.71, + "learning_rate": 1.7815942890023716e-05, + "loss": 1.144, + "step": 579 + }, + { + "epoch": 0.71, + "learning_rate": 1.7807632217256988e-05, + "loss": 0.9478, + "step": 580 + }, + { + "epoch": 0.72, + "learning_rate": 1.779930770898503e-05, + "loss": 1.0221, + "step": 581 + }, + { + "epoch": 0.72, + "learning_rate": 1.7790969379959276e-05, + "loss": 1.1083, + "step": 582 + }, + { + "epoch": 0.72, + "learning_rate": 1.778261724495566e-05, + "loss": 0.9185, + "step": 583 + }, + { + "epoch": 0.72, + "learning_rate": 1.7774251318774568e-05, + "loss": 1.0508, + "step": 584 + }, + { + "epoch": 0.72, + "learning_rate": 1.776587161624083e-05, + "loss": 0.9467, + "step": 585 + }, + { + "epoch": 0.72, + "learning_rate": 1.7757478152203683e-05, + "loss": 1.1174, + "step": 586 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749070941536763e-05, + "loss": 0.985, + "step": 587 + }, + { + "epoch": 0.72, + "learning_rate": 1.774064999913805e-05, + "loss": 1.0967, + "step": 588 + }, + { + "epoch": 0.73, + "learning_rate": 1.7732215339929874e-05, + "loss": 0.9661, + "step": 589 + }, + { + "epoch": 0.73, + "learning_rate": 1.772376697885885e-05, + "loss": 0.983, + "step": 590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715304930895894e-05, + "loss": 1.1112, + "step": 591 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706829211036172e-05, + "loss": 0.9431, + "step": 592 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698339834299064e-05, + "loss": 0.91, + "step": 593 + }, + { + "epoch": 0.73, + "learning_rate": 1.7689836815728164e-05, + "loss": 0.9477, + "step": 594 + }, + { + "epoch": 0.73, + "learning_rate": 1.7681320170391236e-05, + "loss": 0.9234, + "step": 595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7672789913380192e-05, + "loss": 0.9149, + "step": 596 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664246059811058e-05, + "loss": 1.012, + "step": 597 + }, + { + "epoch": 0.74, + "learning_rate": 1.765568862482397e-05, + "loss": 1.1162, + "step": 598 + }, + { + "epoch": 0.74, + "learning_rate": 1.7647117623583107e-05, + "loss": 1.1264, + "step": 599 + }, + { + "epoch": 0.74, + "learning_rate": 1.7638533071276712e-05, + "loss": 0.9201, + "step": 600 + }, + { + "epoch": 0.74, + "learning_rate": 1.7629934983117025e-05, + "loss": 0.9787, + "step": 601 + }, + { + "epoch": 0.74, + "learning_rate": 1.762132337434028e-05, + "loss": 1.0927, + "step": 602 + }, + { + "epoch": 0.74, + "learning_rate": 1.7612698260206668e-05, + "loss": 0.9746, + "step": 603 + }, + { + "epoch": 0.74, + "learning_rate": 1.7604059656000313e-05, + "loss": 1.0678, + "step": 604 + }, + { + "epoch": 0.75, + "learning_rate": 1.759540757702924e-05, + "loss": 0.9678, + "step": 605 + }, + { + "epoch": 0.75, + "learning_rate": 1.7586742038625357e-05, + "loss": 0.9799, + "step": 606 + }, + { + "epoch": 0.75, + "learning_rate": 1.757806305614442e-05, + "loss": 1.0382, + "step": 607 + }, + { + "epoch": 0.75, + "learning_rate": 1.7569370644966007e-05, + "loss": 1.1101, + "step": 608 + }, + { + "epoch": 0.75, + "learning_rate": 1.7560664820493502e-05, + "loss": 1.1365, + "step": 609 + }, + { + "epoch": 0.75, + "learning_rate": 1.7551945598154044e-05, + "loss": 1.115, + "step": 610 + }, + { + "epoch": 0.75, + "learning_rate": 1.754321299339852e-05, + "loss": 0.9999, + "step": 611 + }, + { + "epoch": 0.75, + "learning_rate": 1.753446702170154e-05, + "loss": 0.999, + "step": 612 + }, + { + "epoch": 0.76, + "learning_rate": 1.7525707698561383e-05, + "loss": 1.0083, + "step": 613 + }, + { + "epoch": 0.76, + "learning_rate": 1.7516935039500007e-05, + "loss": 1.0636, + "step": 614 + }, + { + "epoch": 0.76, + "learning_rate": 1.750814906006298e-05, + "loss": 1.0376, + "step": 615 + }, + { + "epoch": 0.76, + "learning_rate": 1.7499349775819497e-05, + "loss": 1.1103, + "step": 616 + }, + { + "epoch": 0.76, + "learning_rate": 1.7490537202362313e-05, + "loss": 1.0945, + "step": 617 + }, + { + "epoch": 0.76, + "learning_rate": 1.7481711355307735e-05, + "loss": 1.0328, + "step": 618 + }, + { + "epoch": 0.76, + "learning_rate": 1.7472872250295603e-05, + "loss": 1.0303, + "step": 619 + }, + { + "epoch": 0.76, + "learning_rate": 1.7464019902989234e-05, + "loss": 1.0486, + "step": 620 + }, + { + "epoch": 0.77, + "learning_rate": 1.7455154329075427e-05, + "loss": 1.0767, + "step": 621 + }, + { + "epoch": 0.77, + "learning_rate": 1.744627554426441e-05, + "loss": 0.9857, + "step": 622 + }, + { + "epoch": 0.77, + "learning_rate": 1.7437383564289816e-05, + "loss": 1.0462, + "step": 623 + }, + { + "epoch": 0.77, + "learning_rate": 1.7428478404908675e-05, + "loss": 1.042, + "step": 624 + }, + { + "epoch": 0.77, + "learning_rate": 1.741956008190136e-05, + "loss": 1.1192, + "step": 625 + }, + { + "epoch": 0.77, + "learning_rate": 1.7410628611071576e-05, + "loss": 1.0238, + "step": 626 + }, + { + "epoch": 0.77, + "learning_rate": 1.7401684008246326e-05, + "loss": 1.0647, + "step": 627 + }, + { + "epoch": 0.77, + "learning_rate": 1.739272628927588e-05, + "loss": 1.0534, + "step": 628 + }, + { + "epoch": 0.77, + "learning_rate": 1.7383755470033756e-05, + "loss": 1.0037, + "step": 629 + }, + { + "epoch": 0.78, + "learning_rate": 1.7374771566416684e-05, + "loss": 1.0622, + "step": 630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7365774594344572e-05, + "loss": 1.0817, + "step": 631 + }, + { + "epoch": 0.78, + "learning_rate": 1.73567645697605e-05, + "loss": 1.0678, + "step": 632 + }, + { + "epoch": 0.78, + "learning_rate": 1.7347741508630673e-05, + "loss": 0.956, + "step": 633 + }, + { + "epoch": 0.78, + "learning_rate": 1.7338705426944393e-05, + "loss": 0.9309, + "step": 634 + }, + { + "epoch": 0.78, + "learning_rate": 1.7329656340714037e-05, + "loss": 0.9983, + "step": 635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7320594265975025e-05, + "loss": 0.9068, + "step": 636 + }, + { + "epoch": 0.78, + "learning_rate": 1.73115192187858e-05, + "loss": 0.8823, + "step": 637 + }, + { + "epoch": 0.79, + "learning_rate": 1.7302431215227782e-05, + "loss": 0.9902, + "step": 638 + }, + { + "epoch": 0.79, + "learning_rate": 1.7293330271405367e-05, + "loss": 0.9584, + "step": 639 + }, + { + "epoch": 0.79, + "learning_rate": 1.7284216403445865e-05, + "loss": 0.9598, + "step": 640 + }, + { + "epoch": 0.79, + "learning_rate": 1.7275089627499493e-05, + "loss": 1.0631, + "step": 641 + }, + { + "epoch": 0.79, + "learning_rate": 1.7265949959739345e-05, + "loss": 0.982, + "step": 642 + }, + { + "epoch": 0.79, + "learning_rate": 1.725679741636136e-05, + "loss": 0.9639, + "step": 643 + }, + { + "epoch": 0.79, + "learning_rate": 1.7247632013584296e-05, + "loss": 1.033, + "step": 644 + }, + { + "epoch": 0.79, + "learning_rate": 1.7238453767649683e-05, + "loss": 1.037, + "step": 645 + }, + { + "epoch": 0.8, + "learning_rate": 1.7229262694821825e-05, + "loss": 1.0494, + "step": 646 + }, + { + "epoch": 0.8, + "learning_rate": 1.7220058811387754e-05, + "loss": 1.0285, + "step": 647 + }, + { + "epoch": 0.8, + "learning_rate": 1.7210842133657197e-05, + "loss": 0.9003, + "step": 648 + }, + { + "epoch": 0.8, + "learning_rate": 1.720161267796256e-05, + "loss": 0.9587, + "step": 649 + }, + { + "epoch": 0.8, + "learning_rate": 1.7192370460658888e-05, + "loss": 0.8851, + "step": 650 + }, + { + "epoch": 0.8, + "learning_rate": 1.7183115498123843e-05, + "loss": 0.9959, + "step": 651 + }, + { + "epoch": 0.8, + "learning_rate": 1.7173847806757662e-05, + "loss": 1.024, + "step": 652 + }, + { + "epoch": 0.8, + "learning_rate": 1.7164567402983153e-05, + "loss": 1.1369, + "step": 653 + }, + { + "epoch": 0.81, + "learning_rate": 1.7155274303245642e-05, + "loss": 1.0514, + "step": 654 + }, + { + "epoch": 0.81, + "learning_rate": 1.714596852401296e-05, + "loss": 0.9785, + "step": 655 + }, + { + "epoch": 0.81, + "learning_rate": 1.7136650081775395e-05, + "loss": 1.1178, + "step": 656 + }, + { + "epoch": 0.81, + "learning_rate": 1.7127318993045686e-05, + "loss": 1.0403, + "step": 657 + }, + { + "epoch": 0.81, + "learning_rate": 1.7117975274358975e-05, + "loss": 1.0075, + "step": 658 + }, + { + "epoch": 0.81, + "learning_rate": 1.7108618942272786e-05, + "loss": 1.0077, + "step": 659 + }, + { + "epoch": 0.81, + "learning_rate": 1.7099250013367e-05, + "loss": 1.0364, + "step": 660 + }, + { + "epoch": 0.81, + "learning_rate": 1.7089868504243816e-05, + "loss": 0.9999, + "step": 661 + }, + { + "epoch": 0.82, + "learning_rate": 1.7080474431527724e-05, + "loss": 1.0608, + "step": 662 + }, + { + "epoch": 0.82, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.9611, + "step": 663 + }, + { + "epoch": 0.82, + "learning_rate": 1.7061648661926068e-05, + "loss": 1.082, + "step": 664 + }, + { + "epoch": 0.82, + "learning_rate": 1.705221699840069e-05, + "loss": 1.0361, + "step": 665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7042772838002704e-05, + "loss": 0.974, + "step": 666 + }, + { + "epoch": 0.82, + "learning_rate": 1.7033316197467634e-05, + "loss": 1.045, + "step": 667 + }, + { + "epoch": 0.82, + "learning_rate": 1.70238470935531e-05, + "loss": 1.0623, + "step": 668 + }, + { + "epoch": 0.82, + "learning_rate": 1.701436554303882e-05, + "loss": 0.9795, + "step": 669 + }, + { + "epoch": 0.83, + "learning_rate": 1.7004871562726563e-05, + "loss": 1.0353, + "step": 670 + }, + { + "epoch": 0.83, + "learning_rate": 1.699536516944013e-05, + "loss": 1.0013, + "step": 671 + }, + { + "epoch": 0.83, + "learning_rate": 1.69858463800253e-05, + "loss": 1.1328, + "step": 672 + }, + { + "epoch": 0.83, + "learning_rate": 1.6976315211349848e-05, + "loss": 1.0368, + "step": 673 + }, + { + "epoch": 0.83, + "learning_rate": 1.6966771680303462e-05, + "loss": 0.9242, + "step": 674 + }, + { + "epoch": 0.83, + "learning_rate": 1.6957215803797748e-05, + "loss": 1.0086, + "step": 675 + }, + { + "epoch": 0.83, + "learning_rate": 1.6947647598766183e-05, + "loss": 1.0575, + "step": 676 + }, + { + "epoch": 0.83, + "learning_rate": 1.6938067082164093e-05, + "loss": 1.0118, + "step": 677 + }, + { + "epoch": 0.84, + "learning_rate": 1.692847427096862e-05, + "loss": 0.9457, + "step": 678 + }, + { + "epoch": 0.84, + "learning_rate": 1.6918869182178698e-05, + "loss": 0.9153, + "step": 679 + }, + { + "epoch": 0.84, + "learning_rate": 1.6909251832815005e-05, + "loss": 0.9573, + "step": 680 + }, + { + "epoch": 0.84, + "learning_rate": 1.6899622239919965e-05, + "loss": 1.0043, + "step": 681 + }, + { + "epoch": 0.84, + "learning_rate": 1.6889980420557674e-05, + "loss": 0.9114, + "step": 682 + }, + { + "epoch": 0.84, + "learning_rate": 1.6880326391813917e-05, + "loss": 0.957, + "step": 683 + }, + { + "epoch": 0.84, + "learning_rate": 1.6870660170796094e-05, + "loss": 0.9858, + "step": 684 + }, + { + "epoch": 0.84, + "learning_rate": 1.6860981774633228e-05, + "loss": 1.0881, + "step": 685 + }, + { + "epoch": 0.85, + "learning_rate": 1.6851291220475908e-05, + "loss": 1.0752, + "step": 686 + }, + { + "epoch": 0.85, + "learning_rate": 1.6841588525496268e-05, + "loss": 1.0071, + "step": 687 + }, + { + "epoch": 0.85, + "learning_rate": 1.683187370688795e-05, + "loss": 0.9215, + "step": 688 + }, + { + "epoch": 0.85, + "learning_rate": 1.6822146781866097e-05, + "loss": 0.8374, + "step": 689 + }, + { + "epoch": 0.85, + "learning_rate": 1.6812407767667293e-05, + "loss": 1.03, + "step": 690 + }, + { + "epoch": 0.85, + "learning_rate": 1.680265668154954e-05, + "loss": 1.0224, + "step": 691 + }, + { + "epoch": 0.85, + "learning_rate": 1.679289354079224e-05, + "loss": 0.957, + "step": 692 + }, + { + "epoch": 0.85, + "learning_rate": 1.6783118362696162e-05, + "loss": 1.0607, + "step": 693 + }, + { + "epoch": 0.86, + "learning_rate": 1.6773331164583393e-05, + "loss": 1.0057, + "step": 694 + }, + { + "epoch": 0.86, + "learning_rate": 1.6763531963797325e-05, + "loss": 1.0486, + "step": 695 + }, + { + "epoch": 0.86, + "learning_rate": 1.675372077770262e-05, + "loss": 1.103, + "step": 696 + }, + { + "epoch": 0.86, + "learning_rate": 1.6743897623685178e-05, + "loss": 1.0367, + "step": 697 + }, + { + "epoch": 0.86, + "learning_rate": 1.6734062519152113e-05, + "loss": 1.1671, + "step": 698 + }, + { + "epoch": 0.86, + "learning_rate": 1.6724215481531704e-05, + "loss": 1.0899, + "step": 699 + }, + { + "epoch": 0.86, + "learning_rate": 1.6714356528273382e-05, + "loss": 0.9339, + "step": 700 + }, + { + "epoch": 0.86, + "learning_rate": 1.6704485676847695e-05, + "loss": 1.0635, + "step": 701 + }, + { + "epoch": 0.86, + "learning_rate": 1.6694602944746275e-05, + "loss": 0.9791, + "step": 702 + }, + { + "epoch": 0.87, + "learning_rate": 1.6684708349481808e-05, + "loss": 1.0137, + "step": 703 + }, + { + "epoch": 0.87, + "learning_rate": 1.6674801908587988e-05, + "loss": 0.9719, + "step": 704 + }, + { + "epoch": 0.87, + "learning_rate": 1.666488363961952e-05, + "loss": 1.0617, + "step": 705 + }, + { + "epoch": 0.87, + "learning_rate": 1.6654953560152063e-05, + "loss": 0.9899, + "step": 706 + }, + { + "epoch": 0.87, + "learning_rate": 1.6645011687782196e-05, + "loss": 1.027, + "step": 707 + }, + { + "epoch": 0.87, + "learning_rate": 1.6635058040127408e-05, + "loss": 1.0079, + "step": 708 + }, + { + "epoch": 0.87, + "learning_rate": 1.662509263482604e-05, + "loss": 0.9832, + "step": 709 + }, + { + "epoch": 0.87, + "learning_rate": 1.6615115489537285e-05, + "loss": 0.9349, + "step": 710 + }, + { + "epoch": 0.88, + "learning_rate": 1.6605126621941127e-05, + "loss": 0.9694, + "step": 711 + }, + { + "epoch": 0.88, + "learning_rate": 1.6595126049738328e-05, + "loss": 0.9241, + "step": 712 + }, + { + "epoch": 0.88, + "learning_rate": 1.658511379065039e-05, + "loss": 0.9774, + "step": 713 + }, + { + "epoch": 0.88, + "learning_rate": 1.657508986241952e-05, + "loss": 0.9507, + "step": 714 + }, + { + "epoch": 0.88, + "learning_rate": 1.6565054282808617e-05, + "loss": 1.0445, + "step": 715 + }, + { + "epoch": 0.88, + "learning_rate": 1.6555007069601208e-05, + "loss": 1.0163, + "step": 716 + }, + { + "epoch": 0.88, + "learning_rate": 1.6544948240601453e-05, + "loss": 0.9039, + "step": 717 + }, + { + "epoch": 0.88, + "learning_rate": 1.653487781363408e-05, + "loss": 1.0291, + "step": 718 + }, + { + "epoch": 0.89, + "learning_rate": 1.6524795806544384e-05, + "loss": 1.0336, + "step": 719 + }, + { + "epoch": 0.89, + "learning_rate": 1.6514702237198172e-05, + "loss": 1.1045, + "step": 720 + }, + { + "epoch": 0.89, + "learning_rate": 1.6504597123481737e-05, + "loss": 1.0446, + "step": 721 + }, + { + "epoch": 0.89, + "learning_rate": 1.6494480483301836e-05, + "loss": 0.9759, + "step": 722 + }, + { + "epoch": 0.89, + "learning_rate": 1.6484352334585654e-05, + "loss": 1.0232, + "step": 723 + }, + { + "epoch": 0.89, + "learning_rate": 1.6474212695280756e-05, + "loss": 1.0596, + "step": 724 + }, + { + "epoch": 0.89, + "learning_rate": 1.6464061583355088e-05, + "loss": 1.0333, + "step": 725 + }, + { + "epoch": 0.89, + "learning_rate": 1.6453899016796903e-05, + "loss": 1.0234, + "step": 726 + }, + { + "epoch": 0.9, + "learning_rate": 1.6443725013614772e-05, + "loss": 0.9545, + "step": 727 + }, + { + "epoch": 0.9, + "learning_rate": 1.6433539591837527e-05, + "loss": 1.0994, + "step": 728 + }, + { + "epoch": 0.9, + "learning_rate": 1.6423342769514227e-05, + "loss": 0.9335, + "step": 729 + }, + { + "epoch": 0.9, + "learning_rate": 1.6413134564714142e-05, + "loss": 1.0766, + "step": 730 + }, + { + "epoch": 0.9, + "learning_rate": 1.640291499552671e-05, + "loss": 1.0006, + "step": 731 + }, + { + "epoch": 0.9, + "learning_rate": 1.6392684080061503e-05, + "loss": 1.1148, + "step": 732 + }, + { + "epoch": 0.9, + "learning_rate": 1.6382441836448203e-05, + "loss": 0.9362, + "step": 733 + }, + { + "epoch": 0.9, + "learning_rate": 1.637218828283657e-05, + "loss": 0.9155, + "step": 734 + }, + { + "epoch": 0.91, + "learning_rate": 1.636192343739639e-05, + "loss": 0.9952, + "step": 735 + }, + { + "epoch": 0.91, + "learning_rate": 1.635164731831748e-05, + "loss": 0.9724, + "step": 736 + }, + { + "epoch": 0.91, + "learning_rate": 1.6341359943809626e-05, + "loss": 1.0119, + "step": 737 + }, + { + "epoch": 0.91, + "learning_rate": 1.633106133210255e-05, + "loss": 0.8702, + "step": 738 + }, + { + "epoch": 0.91, + "learning_rate": 1.63207515014459e-05, + "loss": 1.0013, + "step": 739 + }, + { + "epoch": 0.91, + "learning_rate": 1.6310430470109196e-05, + "loss": 1.0206, + "step": 740 + }, + { + "epoch": 0.91, + "learning_rate": 1.6300098256381807e-05, + "loss": 1.0876, + "step": 741 + }, + { + "epoch": 0.91, + "learning_rate": 1.628975487857293e-05, + "loss": 1.0504, + "step": 742 + }, + { + "epoch": 0.92, + "learning_rate": 1.627940035501152e-05, + "loss": 0.9877, + "step": 743 + }, + { + "epoch": 0.92, + "learning_rate": 1.626903470404631e-05, + "loss": 1.0494, + "step": 744 + }, + { + "epoch": 0.92, + "learning_rate": 1.625865794404573e-05, + "loss": 0.9823, + "step": 745 + }, + { + "epoch": 0.92, + "learning_rate": 1.6248270093397915e-05, + "loss": 0.8848, + "step": 746 + }, + { + "epoch": 0.92, + "learning_rate": 1.6237871170510636e-05, + "loss": 1.0968, + "step": 747 + }, + { + "epoch": 0.92, + "learning_rate": 1.62274611938113e-05, + "loss": 0.901, + "step": 748 + }, + { + "epoch": 0.92, + "learning_rate": 1.621704018174688e-05, + "loss": 1.039, + "step": 749 + }, + { + "epoch": 0.92, + "learning_rate": 1.6206608152783924e-05, + "loss": 1.0096, + "step": 750 + }, + { + "epoch": 0.93, + "learning_rate": 1.6196165125408507e-05, + "loss": 0.9397, + "step": 751 + }, + { + "epoch": 0.93, + "learning_rate": 1.6185711118126164e-05, + "loss": 1.0388, + "step": 752 + }, + { + "epoch": 0.93, + "learning_rate": 1.617524614946192e-05, + "loss": 1.0634, + "step": 753 + }, + { + "epoch": 0.93, + "learning_rate": 1.6164770237960204e-05, + "loss": 1.0586, + "step": 754 + }, + { + "epoch": 0.93, + "learning_rate": 1.6154283402184846e-05, + "loss": 1.0547, + "step": 755 + }, + { + "epoch": 0.93, + "learning_rate": 1.614378566071903e-05, + "loss": 1.0706, + "step": 756 + }, + { + "epoch": 0.93, + "learning_rate": 1.6133277032165264e-05, + "loss": 0.9817, + "step": 757 + }, + { + "epoch": 0.93, + "learning_rate": 1.6122757535145346e-05, + "loss": 1.0162, + "step": 758 + }, + { + "epoch": 0.94, + "learning_rate": 1.611222718830035e-05, + "loss": 0.9974, + "step": 759 + }, + { + "epoch": 0.94, + "learning_rate": 1.6101686010290556e-05, + "loss": 1.0385, + "step": 760 + }, + { + "epoch": 0.94, + "learning_rate": 1.6091134019795447e-05, + "loss": 0.9456, + "step": 761 + }, + { + "epoch": 0.94, + "learning_rate": 1.6080571235513666e-05, + "loss": 1.0276, + "step": 762 + }, + { + "epoch": 0.94, + "learning_rate": 1.606999767616298e-05, + "loss": 0.9206, + "step": 763 + }, + { + "epoch": 0.94, + "learning_rate": 1.605941336048025e-05, + "loss": 0.9756, + "step": 764 + }, + { + "epoch": 0.94, + "learning_rate": 1.604881830722141e-05, + "loss": 0.9648, + "step": 765 + }, + { + "epoch": 0.94, + "learning_rate": 1.60382125351614e-05, + "loss": 1.0213, + "step": 766 + }, + { + "epoch": 0.95, + "learning_rate": 1.6027596063094174e-05, + "loss": 1.0014, + "step": 767 + }, + { + "epoch": 0.95, + "learning_rate": 1.6016968909832632e-05, + "loss": 1.1058, + "step": 768 + }, + { + "epoch": 0.95, + "learning_rate": 1.600633109420861e-05, + "loss": 0.9459, + "step": 769 + }, + { + "epoch": 0.95, + "learning_rate": 1.5995682635072843e-05, + "loss": 1.09, + "step": 770 + }, + { + "epoch": 0.95, + "learning_rate": 1.5985023551294907e-05, + "loss": 1.1158, + "step": 771 + }, + { + "epoch": 0.95, + "learning_rate": 1.597435386176323e-05, + "loss": 0.9966, + "step": 772 + }, + { + "epoch": 0.95, + "learning_rate": 1.5963673585385016e-05, + "loss": 0.9845, + "step": 773 + }, + { + "epoch": 0.95, + "learning_rate": 1.5952982741086238e-05, + "loss": 0.9748, + "step": 774 + }, + { + "epoch": 0.95, + "learning_rate": 1.5942281347811596e-05, + "loss": 1.0895, + "step": 775 + }, + { + "epoch": 0.96, + "learning_rate": 1.5931569424524477e-05, + "loss": 1.1871, + "step": 776 + }, + { + "epoch": 0.96, + "learning_rate": 1.5920846990206934e-05, + "loss": 0.986, + "step": 777 + }, + { + "epoch": 0.96, + "learning_rate": 1.591011406385964e-05, + "loss": 1.052, + "step": 778 + }, + { + "epoch": 0.96, + "learning_rate": 1.589937066450187e-05, + "loss": 0.9269, + "step": 779 + }, + { + "epoch": 0.96, + "learning_rate": 1.5888616811171452e-05, + "loss": 1.0525, + "step": 780 + }, + { + "epoch": 0.96, + "learning_rate": 1.5877852522924733e-05, + "loss": 1.1237, + "step": 781 + }, + { + "epoch": 0.96, + "learning_rate": 1.586707781883656e-05, + "loss": 1.042, + "step": 782 + }, + { + "epoch": 0.96, + "learning_rate": 1.5856292718000235e-05, + "loss": 0.9764, + "step": 783 + }, + { + "epoch": 0.97, + "learning_rate": 1.584549723952748e-05, + "loss": 0.9498, + "step": 784 + }, + { + "epoch": 0.97, + "learning_rate": 1.5834691402548415e-05, + "loss": 0.9808, + "step": 785 + }, + { + "epoch": 0.97, + "learning_rate": 1.5823875226211507e-05, + "loss": 1.0024, + "step": 786 + }, + { + "epoch": 0.97, + "learning_rate": 1.5813048729683543e-05, + "loss": 1.0588, + "step": 787 + }, + { + "epoch": 0.97, + "learning_rate": 1.5802211932149614e-05, + "loss": 0.8983, + "step": 788 + }, + { + "epoch": 0.97, + "learning_rate": 1.5791364852813047e-05, + "loss": 1.0963, + "step": 789 + }, + { + "epoch": 0.97, + "learning_rate": 1.5780507510895398e-05, + "loss": 0.9529, + "step": 790 + }, + { + "epoch": 0.97, + "learning_rate": 1.5769639925636404e-05, + "loss": 0.9525, + "step": 791 + }, + { + "epoch": 0.98, + "learning_rate": 1.575876211629396e-05, + "loss": 0.9296, + "step": 792 + }, + { + "epoch": 0.98, + "learning_rate": 1.5747874102144073e-05, + "loss": 0.9804, + "step": 793 + }, + { + "epoch": 0.98, + "learning_rate": 1.5736975902480832e-05, + "loss": 1.0092, + "step": 794 + }, + { + "epoch": 0.98, + "learning_rate": 1.5726067536616383e-05, + "loss": 0.9943, + "step": 795 + }, + { + "epoch": 0.98, + "learning_rate": 1.571514902388088e-05, + "loss": 0.9165, + "step": 796 + }, + { + "epoch": 0.98, + "learning_rate": 1.5704220383622464e-05, + "loss": 0.9893, + "step": 797 + }, + { + "epoch": 0.98, + "learning_rate": 1.5693281635207214e-05, + "loss": 0.9976, + "step": 798 + }, + { + "epoch": 0.98, + "learning_rate": 1.5682332798019137e-05, + "loss": 1.0344, + "step": 799 + }, + { + "epoch": 0.99, + "learning_rate": 1.567137389146009e-05, + "loss": 1.0415, + "step": 800 + }, + { + "epoch": 0.99, + "learning_rate": 1.5660404934949798e-05, + "loss": 1.0315, + "step": 801 + }, + { + "epoch": 0.99, + "learning_rate": 1.564942594792579e-05, + "loss": 0.9606, + "step": 802 + }, + { + "epoch": 0.99, + "learning_rate": 1.563843694984336e-05, + "loss": 0.9618, + "step": 803 + }, + { + "epoch": 0.99, + "learning_rate": 1.5627437960175556e-05, + "loss": 1.0324, + "step": 804 + }, + { + "epoch": 0.99, + "learning_rate": 1.5616428998413122e-05, + "loss": 0.9187, + "step": 805 + }, + { + "epoch": 0.99, + "learning_rate": 1.5605410084064468e-05, + "loss": 1.0448, + "step": 806 + }, + { + "epoch": 0.99, + "learning_rate": 1.5594381236655665e-05, + "loss": 1.0227, + "step": 807 + }, + { + "epoch": 1.0, + "learning_rate": 1.558334247573035e-05, + "loss": 1.0774, + "step": 808 + }, + { + "epoch": 1.0, + "learning_rate": 1.5572293820849754e-05, + "loss": 1.0876, + "step": 809 + }, + { + "epoch": 1.0, + "learning_rate": 1.5561235291592635e-05, + "loss": 0.9874, + "step": 810 + }, + { + "epoch": 1.0, + "learning_rate": 1.5550166907555243e-05, + "loss": 1.0704, + "step": 811 + }, + { + "epoch": 1.0, + "learning_rate": 1.5539088688351295e-05, + "loss": 1.1851, + "step": 812 + }, + { + "epoch": 1.0, + "learning_rate": 1.5528000653611935e-05, + "loss": 0.6687, + "step": 813 + }, + { + "epoch": 1.0, + "learning_rate": 1.55169028229857e-05, + "loss": 0.6852, + "step": 814 + }, + { + "epoch": 1.0, + "learning_rate": 1.5505795216138498e-05, + "loss": 0.5976, + "step": 815 + }, + { + "epoch": 1.01, + "learning_rate": 1.549467785275354e-05, + "loss": 0.5681, + "step": 816 + }, + { + "epoch": 1.01, + "learning_rate": 1.5483550752531337e-05, + "loss": 0.631, + "step": 817 + }, + { + "epoch": 1.01, + "learning_rate": 1.5472413935189656e-05, + "loss": 0.6254, + "step": 818 + }, + { + "epoch": 1.01, + "learning_rate": 1.546126742046348e-05, + "loss": 0.5024, + "step": 819 + }, + { + "epoch": 1.01, + "learning_rate": 1.5450111228104976e-05, + "loss": 0.5781, + "step": 820 + }, + { + "epoch": 1.01, + "learning_rate": 1.5438945377883463e-05, + "loss": 0.6256, + "step": 821 + }, + { + "epoch": 1.01, + "learning_rate": 1.542776988958537e-05, + "loss": 0.5193, + "step": 822 + }, + { + "epoch": 1.01, + "learning_rate": 1.541658478301421e-05, + "loss": 0.5569, + "step": 823 + }, + { + "epoch": 1.02, + "learning_rate": 1.5405390077990538e-05, + "loss": 0.5445, + "step": 824 + }, + { + "epoch": 1.02, + "learning_rate": 1.5394185794351914e-05, + "loss": 0.5204, + "step": 825 + }, + { + "epoch": 1.02, + "learning_rate": 1.5382971951952878e-05, + "loss": 0.5262, + "step": 826 + }, + { + "epoch": 1.02, + "learning_rate": 1.5371748570664906e-05, + "loss": 0.5566, + "step": 827 + }, + { + "epoch": 1.02, + "learning_rate": 1.5360515670376373e-05, + "loss": 0.5698, + "step": 828 + }, + { + "epoch": 1.02, + "learning_rate": 1.5349273270992537e-05, + "loss": 0.5174, + "step": 829 + }, + { + "epoch": 1.02, + "learning_rate": 1.5338021392435462e-05, + "loss": 0.4506, + "step": 830 + }, + { + "epoch": 1.02, + "learning_rate": 1.5326760054644045e-05, + "loss": 0.5495, + "step": 831 + }, + { + "epoch": 1.03, + "learning_rate": 1.5315489277573906e-05, + "loss": 0.5458, + "step": 832 + }, + { + "epoch": 1.03, + "learning_rate": 1.5304209081197425e-05, + "loss": 0.6553, + "step": 833 + }, + { + "epoch": 1.03, + "learning_rate": 1.5292919485503662e-05, + "loss": 0.611, + "step": 834 + }, + { + "epoch": 1.03, + "learning_rate": 1.5281620510498322e-05, + "loss": 0.5755, + "step": 835 + }, + { + "epoch": 1.03, + "learning_rate": 1.5270312176203742e-05, + "loss": 0.5727, + "step": 836 + }, + { + "epoch": 1.03, + "learning_rate": 1.5258994502658846e-05, + "loss": 0.5053, + "step": 837 + }, + { + "epoch": 1.03, + "learning_rate": 1.5247667509919104e-05, + "loss": 0.5938, + "step": 838 + }, + { + "epoch": 1.03, + "learning_rate": 1.52363312180565e-05, + "loss": 0.7924, + "step": 839 + }, + { + "epoch": 1.04, + "learning_rate": 1.5224985647159489e-05, + "loss": 0.5243, + "step": 840 + }, + { + "epoch": 1.04, + "learning_rate": 1.5213630817332985e-05, + "loss": 0.5438, + "step": 841 + }, + { + "epoch": 1.04, + "learning_rate": 1.5202266748698298e-05, + "loss": 0.5526, + "step": 842 + }, + { + "epoch": 1.04, + "learning_rate": 1.5190893461393108e-05, + "loss": 0.6025, + "step": 843 + }, + { + "epoch": 1.04, + "learning_rate": 1.517951097557144e-05, + "loss": 0.561, + "step": 844 + }, + { + "epoch": 1.04, + "learning_rate": 1.5168119311403611e-05, + "loss": 0.5186, + "step": 845 + }, + { + "epoch": 1.04, + "learning_rate": 1.5156718489076208e-05, + "loss": 0.5673, + "step": 846 + }, + { + "epoch": 1.04, + "learning_rate": 1.5145308528792045e-05, + "loss": 0.6513, + "step": 847 + }, + { + "epoch": 1.05, + "learning_rate": 1.5133889450770122e-05, + "loss": 0.5702, + "step": 848 + }, + { + "epoch": 1.05, + "learning_rate": 1.512246127524561e-05, + "loss": 0.5445, + "step": 849 + }, + { + "epoch": 1.05, + "learning_rate": 1.511102402246979e-05, + "loss": 0.6269, + "step": 850 + }, + { + "epoch": 1.05, + "learning_rate": 1.5099577712710036e-05, + "loss": 0.4926, + "step": 851 + }, + { + "epoch": 1.05, + "learning_rate": 1.508812236624976e-05, + "loss": 0.5888, + "step": 852 + }, + { + "epoch": 1.05, + "learning_rate": 1.50766580033884e-05, + "loss": 0.4709, + "step": 853 + }, + { + "epoch": 1.05, + "learning_rate": 1.506518464444137e-05, + "loss": 0.5209, + "step": 854 + }, + { + "epoch": 1.05, + "learning_rate": 1.505370230974001e-05, + "loss": 0.4759, + "step": 855 + }, + { + "epoch": 1.06, + "learning_rate": 1.5042211019631588e-05, + "loss": 0.6702, + "step": 856 + }, + { + "epoch": 1.06, + "learning_rate": 1.5030710794479226e-05, + "loss": 0.5046, + "step": 857 + }, + { + "epoch": 1.06, + "learning_rate": 1.5019201654661886e-05, + "loss": 0.5031, + "step": 858 + }, + { + "epoch": 1.06, + "learning_rate": 1.5007683620574322e-05, + "loss": 0.6509, + "step": 859 + }, + { + "epoch": 1.06, + "learning_rate": 1.4996156712627059e-05, + "loss": 0.5351, + "step": 860 + }, + { + "epoch": 1.06, + "learning_rate": 1.4984620951246333e-05, + "loss": 0.5281, + "step": 861 + }, + { + "epoch": 1.06, + "learning_rate": 1.4973076356874081e-05, + "loss": 0.5847, + "step": 862 + }, + { + "epoch": 1.06, + "learning_rate": 1.4961522949967887e-05, + "loss": 0.4243, + "step": 863 + }, + { + "epoch": 1.07, + "learning_rate": 1.4949960751000944e-05, + "loss": 0.5879, + "step": 864 + }, + { + "epoch": 1.07, + "learning_rate": 1.4938389780462044e-05, + "loss": 0.5711, + "step": 865 + }, + { + "epoch": 1.07, + "learning_rate": 1.4926810058855508e-05, + "loss": 0.5433, + "step": 866 + }, + { + "epoch": 1.07, + "learning_rate": 1.4915221606701162e-05, + "loss": 0.5825, + "step": 867 + }, + { + "epoch": 1.07, + "learning_rate": 1.4903624444534317e-05, + "loss": 0.5057, + "step": 868 + }, + { + "epoch": 1.07, + "learning_rate": 1.4892018592905702e-05, + "loss": 0.5675, + "step": 869 + }, + { + "epoch": 1.07, + "learning_rate": 1.488040407238146e-05, + "loss": 0.4703, + "step": 870 + }, + { + "epoch": 1.07, + "learning_rate": 1.486878090354308e-05, + "loss": 0.5622, + "step": 871 + }, + { + "epoch": 1.08, + "learning_rate": 1.4857149106987393e-05, + "loss": 0.5979, + "step": 872 + }, + { + "epoch": 1.08, + "learning_rate": 1.4845508703326504e-05, + "loss": 0.5317, + "step": 873 + }, + { + "epoch": 1.08, + "learning_rate": 1.4833859713187777e-05, + "loss": 0.5876, + "step": 874 + }, + { + "epoch": 1.08, + "learning_rate": 1.482220215721379e-05, + "loss": 0.4549, + "step": 875 + }, + { + "epoch": 1.08, + "learning_rate": 1.4810536056062307e-05, + "loss": 0.5158, + "step": 876 + }, + { + "epoch": 1.08, + "learning_rate": 1.4798861430406221e-05, + "loss": 0.6084, + "step": 877 + }, + { + "epoch": 1.08, + "learning_rate": 1.4787178300933543e-05, + "loss": 0.5854, + "step": 878 + }, + { + "epoch": 1.08, + "learning_rate": 1.4775486688347346e-05, + "loss": 0.5037, + "step": 879 + }, + { + "epoch": 1.09, + "learning_rate": 1.476378661336574e-05, + "loss": 0.4829, + "step": 880 + }, + { + "epoch": 1.09, + "learning_rate": 1.4752078096721827e-05, + "loss": 0.6024, + "step": 881 + }, + { + "epoch": 1.09, + "learning_rate": 1.4740361159163668e-05, + "loss": 0.597, + "step": 882 + }, + { + "epoch": 1.09, + "learning_rate": 1.4728635821454255e-05, + "loss": 0.5802, + "step": 883 + }, + { + "epoch": 1.09, + "learning_rate": 1.4716902104371449e-05, + "loss": 0.5768, + "step": 884 + }, + { + "epoch": 1.09, + "learning_rate": 1.4705160028707976e-05, + "loss": 0.4529, + "step": 885 + }, + { + "epoch": 1.09, + "learning_rate": 1.4693409615271365e-05, + "loss": 0.6421, + "step": 886 + }, + { + "epoch": 1.09, + "learning_rate": 1.4681650884883923e-05, + "loss": 0.4661, + "step": 887 + }, + { + "epoch": 1.09, + "learning_rate": 1.4669883858382689e-05, + "loss": 0.615, + "step": 888 + }, + { + "epoch": 1.1, + "learning_rate": 1.4658108556619417e-05, + "loss": 0.4985, + "step": 889 + }, + { + "epoch": 1.1, + "learning_rate": 1.4646325000460509e-05, + "loss": 0.5652, + "step": 890 + }, + { + "epoch": 1.1, + "learning_rate": 1.4634533210787006e-05, + "loss": 0.4635, + "step": 891 + }, + { + "epoch": 1.1, + "learning_rate": 1.4622733208494526e-05, + "loss": 0.5324, + "step": 892 + }, + { + "epoch": 1.1, + "learning_rate": 1.461092501449326e-05, + "loss": 0.5913, + "step": 893 + }, + { + "epoch": 1.1, + "learning_rate": 1.4599108649707899e-05, + "loss": 0.5431, + "step": 894 + }, + { + "epoch": 1.1, + "learning_rate": 1.4587284135077614e-05, + "loss": 0.5692, + "step": 895 + }, + { + "epoch": 1.1, + "learning_rate": 1.4575451491556027e-05, + "loss": 0.6694, + "step": 896 + }, + { + "epoch": 1.11, + "learning_rate": 1.4563610740111163e-05, + "loss": 0.5441, + "step": 897 + }, + { + "epoch": 1.11, + "learning_rate": 1.4551761901725402e-05, + "loss": 0.599, + "step": 898 + }, + { + "epoch": 1.11, + "learning_rate": 1.4539904997395468e-05, + "loss": 0.5007, + "step": 899 + }, + { + "epoch": 1.11, + "learning_rate": 1.4528040048132376e-05, + "loss": 0.5522, + "step": 900 + }, + { + "epoch": 1.11, + "learning_rate": 1.4516167074961394e-05, + "loss": 0.5332, + "step": 901 + }, + { + "epoch": 1.11, + "learning_rate": 1.450428609892201e-05, + "loss": 0.6242, + "step": 902 + }, + { + "epoch": 1.11, + "learning_rate": 1.4492397141067888e-05, + "loss": 0.575, + "step": 903 + }, + { + "epoch": 1.11, + "learning_rate": 1.4480500222466849e-05, + "loss": 0.5436, + "step": 904 + }, + { + "epoch": 1.12, + "learning_rate": 1.4468595364200808e-05, + "loss": 0.5151, + "step": 905 + }, + { + "epoch": 1.12, + "learning_rate": 1.4456682587365759e-05, + "loss": 0.5247, + "step": 906 + }, + { + "epoch": 1.12, + "learning_rate": 1.4444761913071721e-05, + "loss": 0.6823, + "step": 907 + }, + { + "epoch": 1.12, + "learning_rate": 1.4432833362442708e-05, + "loss": 0.6233, + "step": 908 + }, + { + "epoch": 1.12, + "learning_rate": 1.4420896956616698e-05, + "loss": 0.5607, + "step": 909 + }, + { + "epoch": 1.12, + "learning_rate": 1.4408952716745583e-05, + "loss": 0.5813, + "step": 910 + }, + { + "epoch": 1.12, + "learning_rate": 1.4397000663995139e-05, + "loss": 0.606, + "step": 911 + }, + { + "epoch": 1.12, + "learning_rate": 1.4385040819544988e-05, + "loss": 0.5589, + "step": 912 + }, + { + "epoch": 1.13, + "learning_rate": 1.4373073204588556e-05, + "loss": 0.5539, + "step": 913 + }, + { + "epoch": 1.13, + "learning_rate": 1.4361097840333037e-05, + "loss": 0.5728, + "step": 914 + }, + { + "epoch": 1.13, + "learning_rate": 1.434911474799937e-05, + "loss": 0.5567, + "step": 915 + }, + { + "epoch": 1.13, + "learning_rate": 1.4337123948822172e-05, + "loss": 0.5565, + "step": 916 + }, + { + "epoch": 1.13, + "learning_rate": 1.4325125464049725e-05, + "loss": 0.6242, + "step": 917 + }, + { + "epoch": 1.13, + "learning_rate": 1.4313119314943933e-05, + "loss": 0.5333, + "step": 918 + }, + { + "epoch": 1.13, + "learning_rate": 1.4301105522780275e-05, + "loss": 0.51, + "step": 919 + }, + { + "epoch": 1.13, + "learning_rate": 1.4289084108847777e-05, + "loss": 0.6843, + "step": 920 + }, + { + "epoch": 1.14, + "learning_rate": 1.427705509444897e-05, + "loss": 0.5723, + "step": 921 + }, + { + "epoch": 1.14, + "learning_rate": 1.4265018500899856e-05, + "loss": 0.5378, + "step": 922 + }, + { + "epoch": 1.14, + "learning_rate": 1.4252974349529871e-05, + "loss": 0.6137, + "step": 923 + }, + { + "epoch": 1.14, + "learning_rate": 1.4240922661681826e-05, + "loss": 0.6309, + "step": 924 + }, + { + "epoch": 1.14, + "learning_rate": 1.4228863458711915e-05, + "loss": 0.5622, + "step": 925 + }, + { + "epoch": 1.14, + "learning_rate": 1.4216796761989621e-05, + "loss": 0.6125, + "step": 926 + }, + { + "epoch": 1.14, + "learning_rate": 1.4204722592897728e-05, + "loss": 0.4548, + "step": 927 + }, + { + "epoch": 1.14, + "learning_rate": 1.419264097283225e-05, + "loss": 0.4341, + "step": 928 + }, + { + "epoch": 1.15, + "learning_rate": 1.4180551923202406e-05, + "loss": 0.5962, + "step": 929 + }, + { + "epoch": 1.15, + "learning_rate": 1.4168455465430585e-05, + "loss": 0.6933, + "step": 930 + }, + { + "epoch": 1.15, + "learning_rate": 1.4156351620952293e-05, + "loss": 0.5052, + "step": 931 + }, + { + "epoch": 1.15, + "learning_rate": 1.4144240411216144e-05, + "loss": 0.6242, + "step": 932 + }, + { + "epoch": 1.15, + "learning_rate": 1.4132121857683782e-05, + "loss": 0.609, + "step": 933 + }, + { + "epoch": 1.15, + "learning_rate": 1.4119995981829884e-05, + "loss": 0.5992, + "step": 934 + }, + { + "epoch": 1.15, + "learning_rate": 1.4107862805142084e-05, + "loss": 0.4893, + "step": 935 + }, + { + "epoch": 1.15, + "learning_rate": 1.4095722349120977e-05, + "loss": 0.56, + "step": 936 + }, + { + "epoch": 1.16, + "learning_rate": 1.4083574635280029e-05, + "loss": 0.5354, + "step": 937 + }, + { + "epoch": 1.16, + "learning_rate": 1.4071419685145587e-05, + "loss": 0.7124, + "step": 938 + }, + { + "epoch": 1.16, + "learning_rate": 1.405925752025682e-05, + "loss": 0.5729, + "step": 939 + }, + { + "epoch": 1.16, + "learning_rate": 1.4047088162165673e-05, + "loss": 0.5088, + "step": 940 + }, + { + "epoch": 1.16, + "learning_rate": 1.403491163243684e-05, + "loss": 0.5273, + "step": 941 + }, + { + "epoch": 1.16, + "learning_rate": 1.402272795264773e-05, + "loss": 0.6047, + "step": 942 + }, + { + "epoch": 1.16, + "learning_rate": 1.4010537144388416e-05, + "loss": 0.5869, + "step": 943 + }, + { + "epoch": 1.16, + "learning_rate": 1.399833922926161e-05, + "loss": 0.5757, + "step": 944 + }, + { + "epoch": 1.17, + "learning_rate": 1.3986134228882607e-05, + "loss": 0.5671, + "step": 945 + }, + { + "epoch": 1.17, + "learning_rate": 1.3973922164879263e-05, + "loss": 0.4423, + "step": 946 + }, + { + "epoch": 1.17, + "learning_rate": 1.3961703058891955e-05, + "loss": 0.6312, + "step": 947 + }, + { + "epoch": 1.17, + "learning_rate": 1.3949476932573531e-05, + "loss": 0.6056, + "step": 948 + }, + { + "epoch": 1.17, + "learning_rate": 1.3937243807589291e-05, + "loss": 0.5232, + "step": 949 + }, + { + "epoch": 1.17, + "learning_rate": 1.3925003705616917e-05, + "loss": 0.524, + "step": 950 + }, + { + "epoch": 1.17, + "learning_rate": 1.3912756648346477e-05, + "loss": 0.4864, + "step": 951 + }, + { + "epoch": 1.17, + "learning_rate": 1.3900502657480352e-05, + "loss": 0.553, + "step": 952 + }, + { + "epoch": 1.17, + "learning_rate": 1.388824175473321e-05, + "loss": 0.5696, + "step": 953 + }, + { + "epoch": 1.18, + "learning_rate": 1.3875973961831965e-05, + "loss": 0.5393, + "step": 954 + }, + { + "epoch": 1.18, + "learning_rate": 1.3863699300515754e-05, + "loss": 0.5701, + "step": 955 + }, + { + "epoch": 1.18, + "learning_rate": 1.3851417792535866e-05, + "loss": 0.5353, + "step": 956 + }, + { + "epoch": 1.18, + "learning_rate": 1.383912945965574e-05, + "loss": 0.647, + "step": 957 + }, + { + "epoch": 1.18, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.5723, + "step": 958 + }, + { + "epoch": 1.18, + "learning_rate": 1.3814532406308922e-05, + "loss": 0.5151, + "step": 959 + }, + { + "epoch": 1.18, + "learning_rate": 1.380222372942941e-05, + "loss": 0.561, + "step": 960 + }, + { + "epoch": 1.18, + "learning_rate": 1.3789908314823932e-05, + "loss": 0.583, + "step": 961 + }, + { + "epoch": 1.19, + "learning_rate": 1.3777586184316016e-05, + "loss": 0.5595, + "step": 962 + }, + { + "epoch": 1.19, + "learning_rate": 1.3765257359741065e-05, + "loss": 0.5149, + "step": 963 + }, + { + "epoch": 1.19, + "learning_rate": 1.3752921862946364e-05, + "loss": 0.5862, + "step": 964 + }, + { + "epoch": 1.19, + "learning_rate": 1.3740579715791017e-05, + "loss": 0.5593, + "step": 965 + }, + { + "epoch": 1.19, + "learning_rate": 1.3728230940145911e-05, + "loss": 0.5631, + "step": 966 + }, + { + "epoch": 1.19, + "learning_rate": 1.371587555789367e-05, + "loss": 0.6061, + "step": 967 + }, + { + "epoch": 1.19, + "learning_rate": 1.3703513590928647e-05, + "loss": 0.5657, + "step": 968 + }, + { + "epoch": 1.19, + "learning_rate": 1.3691145061156843e-05, + "loss": 0.6639, + "step": 969 + }, + { + "epoch": 1.2, + "learning_rate": 1.3678769990495899e-05, + "loss": 0.6061, + "step": 970 + }, + { + "epoch": 1.2, + "learning_rate": 1.366638840087504e-05, + "loss": 0.6093, + "step": 971 + }, + { + "epoch": 1.2, + "learning_rate": 1.365400031423505e-05, + "loss": 0.6102, + "step": 972 + }, + { + "epoch": 1.2, + "learning_rate": 1.3641605752528225e-05, + "loss": 0.5884, + "step": 973 + }, + { + "epoch": 1.2, + "learning_rate": 1.3629204737718328e-05, + "loss": 0.5254, + "step": 974 + }, + { + "epoch": 1.2, + "learning_rate": 1.3616797291780563e-05, + "loss": 0.5393, + "step": 975 + }, + { + "epoch": 1.2, + "learning_rate": 1.3604383436701536e-05, + "loss": 0.5531, + "step": 976 + }, + { + "epoch": 1.2, + "learning_rate": 1.3591963194479198e-05, + "loss": 0.6008, + "step": 977 + }, + { + "epoch": 1.21, + "learning_rate": 1.3579536587122828e-05, + "loss": 0.5299, + "step": 978 + }, + { + "epoch": 1.21, + "learning_rate": 1.3567103636652976e-05, + "loss": 0.5843, + "step": 979 + }, + { + "epoch": 1.21, + "learning_rate": 1.3554664365101438e-05, + "loss": 0.5744, + "step": 980 + }, + { + "epoch": 1.21, + "learning_rate": 1.3542218794511212e-05, + "loss": 0.5031, + "step": 981 + }, + { + "epoch": 1.21, + "learning_rate": 1.3529766946936456e-05, + "loss": 0.542, + "step": 982 + }, + { + "epoch": 1.21, + "learning_rate": 1.351730884444245e-05, + "loss": 0.5276, + "step": 983 + }, + { + "epoch": 1.21, + "learning_rate": 1.3504844509105562e-05, + "loss": 0.5635, + "step": 984 + }, + { + "epoch": 1.21, + "learning_rate": 1.3492373963013199e-05, + "loss": 0.4981, + "step": 985 + }, + { + "epoch": 1.22, + "learning_rate": 1.3479897228263781e-05, + "loss": 0.6569, + "step": 986 + }, + { + "epoch": 1.22, + "learning_rate": 1.3467414326966685e-05, + "loss": 0.6171, + "step": 987 + }, + { + "epoch": 1.22, + "learning_rate": 1.3454925281242225e-05, + "loss": 0.4989, + "step": 988 + }, + { + "epoch": 1.22, + "learning_rate": 1.3442430113221602e-05, + "loss": 0.5932, + "step": 989 + }, + { + "epoch": 1.22, + "learning_rate": 1.342992884504686e-05, + "loss": 0.5803, + "step": 990 + }, + { + "epoch": 1.22, + "learning_rate": 1.3417421498870854e-05, + "loss": 0.5895, + "step": 991 + }, + { + "epoch": 1.22, + "learning_rate": 1.3404908096857216e-05, + "loss": 0.6557, + "step": 992 + }, + { + "epoch": 1.22, + "learning_rate": 1.3392388661180303e-05, + "loss": 0.5851, + "step": 993 + }, + { + "epoch": 1.23, + "learning_rate": 1.3379863214025169e-05, + "loss": 0.6023, + "step": 994 + }, + { + "epoch": 1.23, + "learning_rate": 1.3367331777587509e-05, + "loss": 0.5347, + "step": 995 + }, + { + "epoch": 1.23, + "learning_rate": 1.335479437407365e-05, + "loss": 0.6429, + "step": 996 + }, + { + "epoch": 1.23, + "learning_rate": 1.3342251025700474e-05, + "loss": 0.5198, + "step": 997 + }, + { + "epoch": 1.23, + "learning_rate": 1.3329701754695412e-05, + "loss": 0.5803, + "step": 998 + }, + { + "epoch": 1.23, + "learning_rate": 1.3317146583296385e-05, + "loss": 0.5882, + "step": 999 + }, + { + "epoch": 1.23, + "learning_rate": 1.3304585533751766e-05, + "loss": 0.5271, + "step": 1000 + }, + { + "epoch": 1.23, + "learning_rate": 1.3292018628320346e-05, + "loss": 0.5584, + "step": 1001 + }, + { + "epoch": 1.24, + "learning_rate": 1.32794458892713e-05, + "loss": 0.6215, + "step": 1002 + }, + { + "epoch": 1.24, + "learning_rate": 1.3266867338884131e-05, + "loss": 0.5837, + "step": 1003 + }, + { + "epoch": 1.24, + "learning_rate": 1.3254282999448647e-05, + "loss": 0.6337, + "step": 1004 + }, + { + "epoch": 1.24, + "learning_rate": 1.3241692893264909e-05, + "loss": 0.558, + "step": 1005 + }, + { + "epoch": 1.24, + "learning_rate": 1.32290970426432e-05, + "loss": 0.4728, + "step": 1006 + }, + { + "epoch": 1.24, + "learning_rate": 1.3216495469903983e-05, + "loss": 0.5645, + "step": 1007 + }, + { + "epoch": 1.24, + "learning_rate": 1.3203888197377857e-05, + "loss": 0.6095, + "step": 1008 + }, + { + "epoch": 1.24, + "learning_rate": 1.3191275247405525e-05, + "loss": 0.5947, + "step": 1009 + }, + { + "epoch": 1.25, + "learning_rate": 1.3178656642337755e-05, + "loss": 0.5517, + "step": 1010 + }, + { + "epoch": 1.25, + "learning_rate": 1.3166032404535326e-05, + "loss": 0.712, + "step": 1011 + }, + { + "epoch": 1.25, + "learning_rate": 1.3153402556369001e-05, + "loss": 0.6198, + "step": 1012 + }, + { + "epoch": 1.25, + "learning_rate": 1.314076712021949e-05, + "loss": 0.475, + "step": 1013 + }, + { + "epoch": 1.25, + "learning_rate": 1.3128126118477402e-05, + "loss": 0.5553, + "step": 1014 + }, + { + "epoch": 1.25, + "learning_rate": 1.3115479573543213e-05, + "loss": 0.5729, + "step": 1015 + }, + { + "epoch": 1.25, + "learning_rate": 1.3102827507827209e-05, + "loss": 0.5815, + "step": 1016 + }, + { + "epoch": 1.25, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.5046, + "step": 1017 + }, + { + "epoch": 1.26, + "learning_rate": 1.3077506903739829e-05, + "loss": 0.5311, + "step": 1018 + }, + { + "epoch": 1.26, + "learning_rate": 1.3064838410237799e-05, + "loss": 0.6729, + "step": 1019 + }, + { + "epoch": 1.26, + "learning_rate": 1.305216448569257e-05, + "loss": 0.5671, + "step": 1020 + }, + { + "epoch": 1.26, + "learning_rate": 1.3039485152562951e-05, + "loss": 0.6124, + "step": 1021 + }, + { + "epoch": 1.26, + "learning_rate": 1.3026800433317348e-05, + "loss": 0.581, + "step": 1022 + }, + { + "epoch": 1.26, + "learning_rate": 1.30141103504337e-05, + "loss": 0.5247, + "step": 1023 + }, + { + "epoch": 1.26, + "learning_rate": 1.3001414926399447e-05, + "loss": 0.5531, + "step": 1024 + }, + { + "epoch": 1.26, + "learning_rate": 1.2988714183711504e-05, + "loss": 0.6003, + "step": 1025 + }, + { + "epoch": 1.26, + "learning_rate": 1.2976008144876211e-05, + "loss": 0.5571, + "step": 1026 + }, + { + "epoch": 1.27, + "learning_rate": 1.296329683240928e-05, + "loss": 0.6207, + "step": 1027 + }, + { + "epoch": 1.27, + "learning_rate": 1.2950580268835784e-05, + "loss": 0.6789, + "step": 1028 + }, + { + "epoch": 1.27, + "learning_rate": 1.2937858476690089e-05, + "loss": 0.5324, + "step": 1029 + }, + { + "epoch": 1.27, + "learning_rate": 1.2925131478515833e-05, + "loss": 0.5551, + "step": 1030 + }, + { + "epoch": 1.27, + "learning_rate": 1.291239929686588e-05, + "loss": 0.4953, + "step": 1031 + }, + { + "epoch": 1.27, + "learning_rate": 1.2899661954302277e-05, + "loss": 0.5855, + "step": 1032 + }, + { + "epoch": 1.27, + "learning_rate": 1.2886919473396212e-05, + "loss": 0.6074, + "step": 1033 + }, + { + "epoch": 1.27, + "learning_rate": 1.2874171876727988e-05, + "loss": 0.5353, + "step": 1034 + }, + { + "epoch": 1.28, + "learning_rate": 1.2861419186886963e-05, + "loss": 0.5915, + "step": 1035 + }, + { + "epoch": 1.28, + "learning_rate": 1.2848661426471532e-05, + "loss": 0.5486, + "step": 1036 + }, + { + "epoch": 1.28, + "learning_rate": 1.2835898618089064e-05, + "loss": 0.5106, + "step": 1037 + }, + { + "epoch": 1.28, + "learning_rate": 1.2823130784355882e-05, + "loss": 0.5592, + "step": 1038 + }, + { + "epoch": 1.28, + "learning_rate": 1.2810357947897205e-05, + "loss": 0.5262, + "step": 1039 + }, + { + "epoch": 1.28, + "learning_rate": 1.2797580131347127e-05, + "loss": 0.5883, + "step": 1040 + }, + { + "epoch": 1.28, + "learning_rate": 1.2784797357348562e-05, + "loss": 0.5447, + "step": 1041 + }, + { + "epoch": 1.28, + "learning_rate": 1.2772009648553208e-05, + "loss": 0.5459, + "step": 1042 + }, + { + "epoch": 1.29, + "learning_rate": 1.2759217027621507e-05, + "loss": 0.5576, + "step": 1043 + }, + { + "epoch": 1.29, + "learning_rate": 1.274641951722261e-05, + "loss": 0.5609, + "step": 1044 + }, + { + "epoch": 1.29, + "learning_rate": 1.2733617140034329e-05, + "loss": 0.5318, + "step": 1045 + }, + { + "epoch": 1.29, + "learning_rate": 1.2720809918743102e-05, + "loss": 0.6397, + "step": 1046 + }, + { + "epoch": 1.29, + "learning_rate": 1.2707997876043952e-05, + "loss": 0.5759, + "step": 1047 + }, + { + "epoch": 1.29, + "learning_rate": 1.2695181034640435e-05, + "loss": 0.5347, + "step": 1048 + }, + { + "epoch": 1.29, + "learning_rate": 1.268235941724463e-05, + "loss": 0.5401, + "step": 1049 + }, + { + "epoch": 1.29, + "learning_rate": 1.2669533046577063e-05, + "loss": 0.5342, + "step": 1050 + }, + { + "epoch": 1.3, + "learning_rate": 1.2656701945366689e-05, + "loss": 0.5112, + "step": 1051 + }, + { + "epoch": 1.3, + "learning_rate": 1.2643866136350847e-05, + "loss": 0.5222, + "step": 1052 + }, + { + "epoch": 1.3, + "learning_rate": 1.2631025642275212e-05, + "loss": 0.5332, + "step": 1053 + }, + { + "epoch": 1.3, + "learning_rate": 1.2618180485893775e-05, + "loss": 0.6541, + "step": 1054 + }, + { + "epoch": 1.3, + "learning_rate": 1.2605330689968771e-05, + "loss": 0.5035, + "step": 1055 + }, + { + "epoch": 1.3, + "learning_rate": 1.2592476277270671e-05, + "loss": 0.5342, + "step": 1056 + }, + { + "epoch": 1.3, + "learning_rate": 1.257961727057812e-05, + "loss": 0.581, + "step": 1057 + }, + { + "epoch": 1.3, + "learning_rate": 1.2566753692677902e-05, + "loss": 0.5817, + "step": 1058 + }, + { + "epoch": 1.31, + "learning_rate": 1.2553885566364907e-05, + "loss": 0.5856, + "step": 1059 + }, + { + "epoch": 1.31, + "learning_rate": 1.2541012914442088e-05, + "loss": 0.5536, + "step": 1060 + }, + { + "epoch": 1.31, + "learning_rate": 1.2528135759720403e-05, + "loss": 0.5319, + "step": 1061 + }, + { + "epoch": 1.31, + "learning_rate": 1.2515254125018803e-05, + "loss": 0.4743, + "step": 1062 + }, + { + "epoch": 1.31, + "learning_rate": 1.2502368033164176e-05, + "loss": 0.4541, + "step": 1063 + }, + { + "epoch": 1.31, + "learning_rate": 1.24894775069913e-05, + "loss": 0.5831, + "step": 1064 + }, + { + "epoch": 1.31, + "learning_rate": 1.2476582569342819e-05, + "loss": 0.5395, + "step": 1065 + }, + { + "epoch": 1.31, + "learning_rate": 1.2463683243069192e-05, + "loss": 0.542, + "step": 1066 + }, + { + "epoch": 1.32, + "learning_rate": 1.2450779551028651e-05, + "loss": 0.5681, + "step": 1067 + }, + { + "epoch": 1.32, + "learning_rate": 1.2437871516087174e-05, + "loss": 0.5889, + "step": 1068 + }, + { + "epoch": 1.32, + "learning_rate": 1.2424959161118425e-05, + "loss": 0.5713, + "step": 1069 + }, + { + "epoch": 1.32, + "learning_rate": 1.2412042509003728e-05, + "loss": 0.5874, + "step": 1070 + }, + { + "epoch": 1.32, + "learning_rate": 1.2399121582632018e-05, + "loss": 0.6203, + "step": 1071 + }, + { + "epoch": 1.32, + "learning_rate": 1.2386196404899808e-05, + "loss": 0.5156, + "step": 1072 + }, + { + "epoch": 1.32, + "learning_rate": 1.2373266998711152e-05, + "loss": 0.4884, + "step": 1073 + }, + { + "epoch": 1.32, + "learning_rate": 1.2360333386977574e-05, + "loss": 0.6521, + "step": 1074 + }, + { + "epoch": 1.33, + "learning_rate": 1.2347395592618075e-05, + "loss": 0.5497, + "step": 1075 + }, + { + "epoch": 1.33, + "learning_rate": 1.2334453638559057e-05, + "loss": 0.5648, + "step": 1076 + }, + { + "epoch": 1.33, + "learning_rate": 1.232150754773429e-05, + "loss": 0.5857, + "step": 1077 + }, + { + "epoch": 1.33, + "learning_rate": 1.2308557343084881e-05, + "loss": 0.5554, + "step": 1078 + }, + { + "epoch": 1.33, + "learning_rate": 1.2295603047559226e-05, + "loss": 0.5455, + "step": 1079 + }, + { + "epoch": 1.33, + "learning_rate": 1.2282644684112964e-05, + "loss": 0.5518, + "step": 1080 + }, + { + "epoch": 1.33, + "learning_rate": 1.2269682275708951e-05, + "loss": 0.5088, + "step": 1081 + }, + { + "epoch": 1.33, + "learning_rate": 1.225671584531721e-05, + "loss": 0.5572, + "step": 1082 + }, + { + "epoch": 1.34, + "learning_rate": 1.2243745415914882e-05, + "loss": 0.536, + "step": 1083 + }, + { + "epoch": 1.34, + "learning_rate": 1.2230771010486204e-05, + "loss": 0.5277, + "step": 1084 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217792652022452e-05, + "loss": 0.5343, + "step": 1085 + }, + { + "epoch": 1.34, + "learning_rate": 1.2204810363521919e-05, + "loss": 0.6137, + "step": 1086 + }, + { + "epoch": 1.34, + "learning_rate": 1.2191824167989845e-05, + "loss": 0.5242, + "step": 1087 + }, + { + "epoch": 1.34, + "learning_rate": 1.2178834088438404e-05, + "loss": 0.595, + "step": 1088 + }, + { + "epoch": 1.34, + "learning_rate": 1.2165840147886656e-05, + "loss": 0.5137, + "step": 1089 + }, + { + "epoch": 1.34, + "learning_rate": 1.2152842369360489e-05, + "loss": 0.6098, + "step": 1090 + }, + { + "epoch": 1.34, + "learning_rate": 1.2139840775892606e-05, + "loss": 0.5694, + "step": 1091 + }, + { + "epoch": 1.35, + "learning_rate": 1.2126835390522466e-05, + "loss": 0.5768, + "step": 1092 + }, + { + "epoch": 1.35, + "learning_rate": 1.2113826236296245e-05, + "loss": 0.5193, + "step": 1093 + }, + { + "epoch": 1.35, + "learning_rate": 1.21008133362668e-05, + "loss": 0.4776, + "step": 1094 + }, + { + "epoch": 1.35, + "learning_rate": 1.2087796713493618e-05, + "loss": 0.5436, + "step": 1095 + }, + { + "epoch": 1.35, + "learning_rate": 1.2074776391042797e-05, + "loss": 0.567, + "step": 1096 + }, + { + "epoch": 1.35, + "learning_rate": 1.2061752391986982e-05, + "loss": 0.5557, + "step": 1097 + }, + { + "epoch": 1.35, + "learning_rate": 1.2048724739405337e-05, + "loss": 0.5333, + "step": 1098 + }, + { + "epoch": 1.35, + "learning_rate": 1.2035693456383493e-05, + "loss": 0.551, + "step": 1099 + }, + { + "epoch": 1.36, + "learning_rate": 1.202265856601352e-05, + "loss": 0.5993, + "step": 1100 + }, + { + "epoch": 1.36, + "learning_rate": 1.2009620091393885e-05, + "loss": 0.5892, + "step": 1101 + }, + { + "epoch": 1.36, + "learning_rate": 1.1996578055629395e-05, + "loss": 0.5147, + "step": 1102 + }, + { + "epoch": 1.36, + "learning_rate": 1.1983532481831179e-05, + "loss": 0.4809, + "step": 1103 + }, + { + "epoch": 1.36, + "learning_rate": 1.1970483393116626e-05, + "loss": 0.5957, + "step": 1104 + }, + { + "epoch": 1.36, + "learning_rate": 1.1957430812609361e-05, + "loss": 0.5448, + "step": 1105 + }, + { + "epoch": 1.36, + "learning_rate": 1.1944374763439189e-05, + "loss": 0.5733, + "step": 1106 + }, + { + "epoch": 1.36, + "learning_rate": 1.1931315268742075e-05, + "loss": 0.5776, + "step": 1107 + }, + { + "epoch": 1.37, + "learning_rate": 1.1918252351660066e-05, + "loss": 0.5024, + "step": 1108 + }, + { + "epoch": 1.37, + "learning_rate": 1.1905186035341304e-05, + "loss": 0.5114, + "step": 1109 + }, + { + "epoch": 1.37, + "learning_rate": 1.189211634293993e-05, + "loss": 0.5298, + "step": 1110 + }, + { + "epoch": 1.37, + "learning_rate": 1.187904329761608e-05, + "loss": 0.5016, + "step": 1111 + }, + { + "epoch": 1.37, + "learning_rate": 1.1865966922535826e-05, + "loss": 0.4817, + "step": 1112 + }, + { + "epoch": 1.37, + "learning_rate": 1.1852887240871145e-05, + "loss": 0.4268, + "step": 1113 + }, + { + "epoch": 1.37, + "learning_rate": 1.183980427579987e-05, + "loss": 0.5752, + "step": 1114 + }, + { + "epoch": 1.37, + "learning_rate": 1.1826718050505653e-05, + "loss": 0.4588, + "step": 1115 + }, + { + "epoch": 1.38, + "learning_rate": 1.1813628588177923e-05, + "loss": 0.4916, + "step": 1116 + }, + { + "epoch": 1.38, + "learning_rate": 1.1800535912011846e-05, + "loss": 0.655, + "step": 1117 + }, + { + "epoch": 1.38, + "learning_rate": 1.1787440045208287e-05, + "loss": 0.5271, + "step": 1118 + }, + { + "epoch": 1.38, + "learning_rate": 1.1774341010973753e-05, + "loss": 0.5527, + "step": 1119 + }, + { + "epoch": 1.38, + "learning_rate": 1.1761238832520384e-05, + "loss": 0.5261, + "step": 1120 + }, + { + "epoch": 1.38, + "learning_rate": 1.1748133533065864e-05, + "loss": 0.6096, + "step": 1121 + }, + { + "epoch": 1.38, + "learning_rate": 1.1735025135833436e-05, + "loss": 0.5587, + "step": 1122 + }, + { + "epoch": 1.38, + "learning_rate": 1.1721913664051814e-05, + "loss": 0.5811, + "step": 1123 + }, + { + "epoch": 1.39, + "learning_rate": 1.1708799140955165e-05, + "loss": 0.5232, + "step": 1124 + }, + { + "epoch": 1.39, + "learning_rate": 1.1695681589783065e-05, + "loss": 0.6073, + "step": 1125 + }, + { + "epoch": 1.39, + "learning_rate": 1.1682561033780457e-05, + "loss": 0.4646, + "step": 1126 + }, + { + "epoch": 1.39, + "learning_rate": 1.16694374961976e-05, + "loss": 0.6386, + "step": 1127 + }, + { + "epoch": 1.39, + "learning_rate": 1.165631100029005e-05, + "loss": 0.4819, + "step": 1128 + }, + { + "epoch": 1.39, + "learning_rate": 1.1643181569318596e-05, + "loss": 0.5645, + "step": 1129 + }, + { + "epoch": 1.39, + "learning_rate": 1.1630049226549227e-05, + "loss": 0.5028, + "step": 1130 + }, + { + "epoch": 1.39, + "learning_rate": 1.16169139952531e-05, + "loss": 0.6587, + "step": 1131 + }, + { + "epoch": 1.4, + "learning_rate": 1.1603775898706479e-05, + "loss": 0.5249, + "step": 1132 + }, + { + "epoch": 1.4, + "learning_rate": 1.1590634960190722e-05, + "loss": 0.4885, + "step": 1133 + }, + { + "epoch": 1.4, + "learning_rate": 1.1577491202992204e-05, + "loss": 0.5353, + "step": 1134 + }, + { + "epoch": 1.4, + "learning_rate": 1.156434465040231e-05, + "loss": 0.5796, + "step": 1135 + }, + { + "epoch": 1.4, + "learning_rate": 1.155119532571737e-05, + "loss": 0.5553, + "step": 1136 + }, + { + "epoch": 1.4, + "learning_rate": 1.1538043252238629e-05, + "loss": 0.5264, + "step": 1137 + }, + { + "epoch": 1.4, + "learning_rate": 1.15248884532722e-05, + "loss": 0.5136, + "step": 1138 + }, + { + "epoch": 1.4, + "learning_rate": 1.1511730952129037e-05, + "loss": 0.5225, + "step": 1139 + }, + { + "epoch": 1.41, + "learning_rate": 1.1498570772124863e-05, + "loss": 0.5899, + "step": 1140 + }, + { + "epoch": 1.41, + "learning_rate": 1.1485407936580169e-05, + "loss": 0.5824, + "step": 1141 + }, + { + "epoch": 1.41, + "learning_rate": 1.1472242468820136e-05, + "loss": 0.5478, + "step": 1142 + }, + { + "epoch": 1.41, + "learning_rate": 1.1459074392174619e-05, + "loss": 0.546, + "step": 1143 + }, + { + "epoch": 1.41, + "learning_rate": 1.1445903729978088e-05, + "loss": 0.5521, + "step": 1144 + }, + { + "epoch": 1.41, + "learning_rate": 1.1432730505569597e-05, + "loss": 0.3808, + "step": 1145 + }, + { + "epoch": 1.41, + "learning_rate": 1.1419554742292753e-05, + "loss": 0.5555, + "step": 1146 + }, + { + "epoch": 1.41, + "learning_rate": 1.1406376463495643e-05, + "loss": 0.5659, + "step": 1147 + }, + { + "epoch": 1.42, + "learning_rate": 1.1393195692530823e-05, + "loss": 0.5034, + "step": 1148 + }, + { + "epoch": 1.42, + "learning_rate": 1.1380012452755259e-05, + "loss": 0.534, + "step": 1149 + }, + { + "epoch": 1.42, + "learning_rate": 1.1366826767530296e-05, + "loss": 0.592, + "step": 1150 + }, + { + "epoch": 1.42, + "learning_rate": 1.1353638660221616e-05, + "loss": 0.5388, + "step": 1151 + }, + { + "epoch": 1.42, + "learning_rate": 1.1340448154199186e-05, + "loss": 0.5289, + "step": 1152 + }, + { + "epoch": 1.42, + "learning_rate": 1.1327255272837221e-05, + "loss": 0.5205, + "step": 1153 + }, + { + "epoch": 1.42, + "learning_rate": 1.131406003951416e-05, + "loss": 0.5794, + "step": 1154 + }, + { + "epoch": 1.42, + "learning_rate": 1.130086247761259e-05, + "loss": 0.5576, + "step": 1155 + }, + { + "epoch": 1.43, + "learning_rate": 1.1287662610519244e-05, + "loss": 0.549, + "step": 1156 + }, + { + "epoch": 1.43, + "learning_rate": 1.1274460461624925e-05, + "loss": 0.4726, + "step": 1157 + }, + { + "epoch": 1.43, + "learning_rate": 1.1261256054324488e-05, + "loss": 0.5337, + "step": 1158 + }, + { + "epoch": 1.43, + "learning_rate": 1.1248049412016782e-05, + "loss": 0.5183, + "step": 1159 + }, + { + "epoch": 1.43, + "learning_rate": 1.1234840558104628e-05, + "loss": 0.615, + "step": 1160 + }, + { + "epoch": 1.43, + "learning_rate": 1.1221629515994754e-05, + "loss": 0.5413, + "step": 1161 + }, + { + "epoch": 1.43, + "learning_rate": 1.1208416309097775e-05, + "loss": 0.5226, + "step": 1162 + }, + { + "epoch": 1.43, + "learning_rate": 1.1195200960828138e-05, + "loss": 0.6069, + "step": 1163 + }, + { + "epoch": 1.43, + "learning_rate": 1.1181983494604082e-05, + "loss": 0.578, + "step": 1164 + }, + { + "epoch": 1.44, + "learning_rate": 1.1168763933847608e-05, + "loss": 0.5077, + "step": 1165 + }, + { + "epoch": 1.44, + "learning_rate": 1.1155542301984415e-05, + "loss": 0.5418, + "step": 1166 + }, + { + "epoch": 1.44, + "learning_rate": 1.1142318622443883e-05, + "loss": 0.4442, + "step": 1167 + }, + { + "epoch": 1.44, + "learning_rate": 1.1129092918659019e-05, + "loss": 0.5971, + "step": 1168 + }, + { + "epoch": 1.44, + "learning_rate": 1.1115865214066414e-05, + "loss": 0.5145, + "step": 1169 + }, + { + "epoch": 1.44, + "learning_rate": 1.1102635532106204e-05, + "loss": 0.5157, + "step": 1170 + }, + { + "epoch": 1.44, + "learning_rate": 1.108940389622204e-05, + "loss": 0.5566, + "step": 1171 + }, + { + "epoch": 1.44, + "learning_rate": 1.1076170329861012e-05, + "loss": 0.5325, + "step": 1172 + }, + { + "epoch": 1.45, + "learning_rate": 1.1062934856473655e-05, + "loss": 0.5184, + "step": 1173 + }, + { + "epoch": 1.45, + "learning_rate": 1.1049697499513871e-05, + "loss": 0.5819, + "step": 1174 + }, + { + "epoch": 1.45, + "learning_rate": 1.1036458282438905e-05, + "loss": 0.5604, + "step": 1175 + }, + { + "epoch": 1.45, + "learning_rate": 1.102321722870929e-05, + "loss": 0.5077, + "step": 1176 + }, + { + "epoch": 1.45, + "learning_rate": 1.1009974361788822e-05, + "loss": 0.5139, + "step": 1177 + }, + { + "epoch": 1.45, + "learning_rate": 1.099672970514451e-05, + "loss": 0.5208, + "step": 1178 + }, + { + "epoch": 1.45, + "learning_rate": 1.0983483282246527e-05, + "loss": 0.5535, + "step": 1179 + }, + { + "epoch": 1.45, + "learning_rate": 1.0970235116568186e-05, + "loss": 0.5893, + "step": 1180 + }, + { + "epoch": 1.46, + "learning_rate": 1.095698523158588e-05, + "loss": 0.5307, + "step": 1181 + }, + { + "epoch": 1.46, + "learning_rate": 1.094373365077905e-05, + "loss": 0.4983, + "step": 1182 + }, + { + "epoch": 1.46, + "learning_rate": 1.0930480397630146e-05, + "loss": 0.445, + "step": 1183 + }, + { + "epoch": 1.46, + "learning_rate": 1.0917225495624581e-05, + "loss": 0.4914, + "step": 1184 + }, + { + "epoch": 1.46, + "learning_rate": 1.0903968968250682e-05, + "loss": 0.4853, + "step": 1185 + }, + { + "epoch": 1.46, + "learning_rate": 1.0890710838999671e-05, + "loss": 0.5507, + "step": 1186 + }, + { + "epoch": 1.46, + "learning_rate": 1.087745113136559e-05, + "loss": 0.5086, + "step": 1187 + }, + { + "epoch": 1.46, + "learning_rate": 1.0864189868845296e-05, + "loss": 0.7193, + "step": 1188 + }, + { + "epoch": 1.47, + "learning_rate": 1.085092707493839e-05, + "loss": 0.6319, + "step": 1189 + }, + { + "epoch": 1.47, + "learning_rate": 1.0837662773147189e-05, + "loss": 0.5104, + "step": 1190 + }, + { + "epoch": 1.47, + "learning_rate": 1.0824396986976681e-05, + "loss": 0.5457, + "step": 1191 + }, + { + "epoch": 1.47, + "learning_rate": 1.0811129739934494e-05, + "loss": 0.5824, + "step": 1192 + }, + { + "epoch": 1.47, + "learning_rate": 1.0797861055530832e-05, + "loss": 0.5203, + "step": 1193 + }, + { + "epoch": 1.47, + "learning_rate": 1.0784590957278452e-05, + "loss": 0.6037, + "step": 1194 + }, + { + "epoch": 1.47, + "learning_rate": 1.0771319468692613e-05, + "loss": 0.4794, + "step": 1195 + }, + { + "epoch": 1.47, + "learning_rate": 1.0758046613291043e-05, + "loss": 0.6196, + "step": 1196 + }, + { + "epoch": 1.48, + "learning_rate": 1.0744772414593889e-05, + "loss": 0.5942, + "step": 1197 + }, + { + "epoch": 1.48, + "learning_rate": 1.0731496896123676e-05, + "loss": 0.4483, + "step": 1198 + }, + { + "epoch": 1.48, + "learning_rate": 1.0718220081405277e-05, + "loss": 0.6162, + "step": 1199 + }, + { + "epoch": 1.48, + "learning_rate": 1.0704941993965849e-05, + "loss": 0.4529, + "step": 1200 + }, + { + "epoch": 1.48, + "learning_rate": 1.0691662657334815e-05, + "loss": 0.6396, + "step": 1201 + }, + { + "epoch": 1.48, + "learning_rate": 1.0678382095043807e-05, + "loss": 0.4539, + "step": 1202 + }, + { + "epoch": 1.48, + "learning_rate": 1.0665100330626625e-05, + "loss": 0.6123, + "step": 1203 + }, + { + "epoch": 1.48, + "learning_rate": 1.0651817387619206e-05, + "loss": 0.53, + "step": 1204 + }, + { + "epoch": 1.49, + "learning_rate": 1.0638533289559574e-05, + "loss": 0.5906, + "step": 1205 + }, + { + "epoch": 1.49, + "learning_rate": 1.06252480599878e-05, + "loss": 0.5797, + "step": 1206 + }, + { + "epoch": 1.49, + "learning_rate": 1.0611961722445955e-05, + "loss": 0.6582, + "step": 1207 + }, + { + "epoch": 1.49, + "learning_rate": 1.0598674300478085e-05, + "loss": 0.488, + "step": 1208 + }, + { + "epoch": 1.49, + "learning_rate": 1.0585385817630137e-05, + "loss": 0.5822, + "step": 1209 + }, + { + "epoch": 1.49, + "learning_rate": 1.0572096297449967e-05, + "loss": 0.4932, + "step": 1210 + }, + { + "epoch": 1.49, + "learning_rate": 1.0558805763487242e-05, + "loss": 0.5382, + "step": 1211 + }, + { + "epoch": 1.49, + "learning_rate": 1.0545514239293437e-05, + "loss": 0.5278, + "step": 1212 + }, + { + "epoch": 1.5, + "learning_rate": 1.0532221748421786e-05, + "loss": 0.6017, + "step": 1213 + }, + { + "epoch": 1.5, + "learning_rate": 1.0518928314427233e-05, + "loss": 0.5503, + "step": 1214 + }, + { + "epoch": 1.5, + "learning_rate": 1.0505633960866384e-05, + "loss": 0.5056, + "step": 1215 + }, + { + "epoch": 1.5, + "learning_rate": 1.0492338711297488e-05, + "loss": 0.5756, + "step": 1216 + }, + { + "epoch": 1.5, + "learning_rate": 1.047904258928037e-05, + "loss": 0.5415, + "step": 1217 + }, + { + "epoch": 1.5, + "learning_rate": 1.0465745618376417e-05, + "loss": 0.5843, + "step": 1218 + }, + { + "epoch": 1.5, + "learning_rate": 1.0452447822148499e-05, + "loss": 0.5269, + "step": 1219 + }, + { + "epoch": 1.5, + "learning_rate": 1.043914922416097e-05, + "loss": 0.6205, + "step": 1220 + }, + { + "epoch": 1.51, + "learning_rate": 1.0425849847979586e-05, + "loss": 0.6223, + "step": 1221 + }, + { + "epoch": 1.51, + "learning_rate": 1.0412549717171497e-05, + "loss": 0.5524, + "step": 1222 + }, + { + "epoch": 1.51, + "learning_rate": 1.0399248855305178e-05, + "loss": 0.5667, + "step": 1223 + }, + { + "epoch": 1.51, + "learning_rate": 1.0385947285950407e-05, + "loss": 0.5062, + "step": 1224 + }, + { + "epoch": 1.51, + "learning_rate": 1.0372645032678215e-05, + "loss": 0.5965, + "step": 1225 + }, + { + "epoch": 1.51, + "learning_rate": 1.0359342119060844e-05, + "loss": 0.4743, + "step": 1226 + }, + { + "epoch": 1.51, + "learning_rate": 1.0346038568671708e-05, + "loss": 0.6031, + "step": 1227 + }, + { + "epoch": 1.51, + "learning_rate": 1.0332734405085343e-05, + "loss": 0.5637, + "step": 1228 + }, + { + "epoch": 1.52, + "learning_rate": 1.031942965187738e-05, + "loss": 0.5094, + "step": 1229 + }, + { + "epoch": 1.52, + "learning_rate": 1.0306124332624484e-05, + "loss": 0.667, + "step": 1230 + }, + { + "epoch": 1.52, + "learning_rate": 1.029281847090434e-05, + "loss": 0.5506, + "step": 1231 + }, + { + "epoch": 1.52, + "learning_rate": 1.0279512090295574e-05, + "loss": 0.4839, + "step": 1232 + }, + { + "epoch": 1.52, + "learning_rate": 1.026620521437775e-05, + "loss": 0.5498, + "step": 1233 + }, + { + "epoch": 1.52, + "learning_rate": 1.0252897866731295e-05, + "loss": 0.6048, + "step": 1234 + }, + { + "epoch": 1.52, + "learning_rate": 1.0239590070937483e-05, + "loss": 0.4795, + "step": 1235 + }, + { + "epoch": 1.52, + "learning_rate": 1.0226281850578377e-05, + "loss": 0.5554, + "step": 1236 + }, + { + "epoch": 1.52, + "learning_rate": 1.0212973229236787e-05, + "loss": 0.5215, + "step": 1237 + }, + { + "epoch": 1.53, + "learning_rate": 1.0199664230496247e-05, + "loss": 0.5171, + "step": 1238 + }, + { + "epoch": 1.53, + "learning_rate": 1.0186354877940948e-05, + "loss": 0.5311, + "step": 1239 + }, + { + "epoch": 1.53, + "learning_rate": 1.0173045195155712e-05, + "loss": 0.495, + "step": 1240 + }, + { + "epoch": 1.53, + "learning_rate": 1.0159735205725949e-05, + "loss": 0.5537, + "step": 1241 + }, + { + "epoch": 1.53, + "learning_rate": 1.0146424933237608e-05, + "loss": 0.5556, + "step": 1242 + }, + { + "epoch": 1.53, + "learning_rate": 1.013311440127714e-05, + "loss": 0.5388, + "step": 1243 + }, + { + "epoch": 1.53, + "learning_rate": 1.0119803633431459e-05, + "loss": 0.5613, + "step": 1244 + }, + { + "epoch": 1.53, + "learning_rate": 1.0106492653287893e-05, + "loss": 0.533, + "step": 1245 + }, + { + "epoch": 1.54, + "learning_rate": 1.0093181484434151e-05, + "loss": 0.5268, + "step": 1246 + }, + { + "epoch": 1.54, + "learning_rate": 1.0079870150458274e-05, + "loss": 0.6224, + "step": 1247 + }, + { + "epoch": 1.54, + "learning_rate": 1.006655867494859e-05, + "loss": 0.5594, + "step": 1248 + }, + { + "epoch": 1.54, + "learning_rate": 1.0053247081493684e-05, + "loss": 0.4897, + "step": 1249 + }, + { + "epoch": 1.54, + "learning_rate": 1.0039935393682358e-05, + "loss": 0.4742, + "step": 1250 + }, + { + "epoch": 1.54, + "learning_rate": 1.0026623635103563e-05, + "loss": 0.5325, + "step": 1251 + }, + { + "epoch": 1.54, + "learning_rate": 1.0013311829346389e-05, + "loss": 0.6013, + "step": 1252 + }, + { + "epoch": 1.54, + "learning_rate": 1e-05, + "loss": 0.5623, + "step": 1253 + }, + { + "epoch": 1.55, + "learning_rate": 9.986688170653616e-06, + "loss": 0.4787, + "step": 1254 + }, + { + "epoch": 1.55, + "learning_rate": 9.973376364896438e-06, + "loss": 0.63, + "step": 1255 + }, + { + "epoch": 1.55, + "learning_rate": 9.960064606317647e-06, + "loss": 0.4757, + "step": 1256 + }, + { + "epoch": 1.55, + "learning_rate": 9.946752918506319e-06, + "loss": 0.5611, + "step": 1257 + }, + { + "epoch": 1.55, + "learning_rate": 9.933441325051414e-06, + "loss": 0.5185, + "step": 1258 + }, + { + "epoch": 1.55, + "learning_rate": 9.920129849541731e-06, + "loss": 0.508, + "step": 1259 + }, + { + "epoch": 1.55, + "learning_rate": 9.906818515565849e-06, + "loss": 0.5856, + "step": 1260 + }, + { + "epoch": 1.55, + "learning_rate": 9.893507346712112e-06, + "loss": 0.5983, + "step": 1261 + }, + { + "epoch": 1.56, + "learning_rate": 9.880196366568546e-06, + "loss": 0.4987, + "step": 1262 + }, + { + "epoch": 1.56, + "learning_rate": 9.866885598722865e-06, + "loss": 0.6237, + "step": 1263 + }, + { + "epoch": 1.56, + "learning_rate": 9.853575066762395e-06, + "loss": 0.5622, + "step": 1264 + }, + { + "epoch": 1.56, + "learning_rate": 9.840264794274053e-06, + "loss": 0.543, + "step": 1265 + }, + { + "epoch": 1.56, + "learning_rate": 9.826954804844288e-06, + "loss": 0.5344, + "step": 1266 + }, + { + "epoch": 1.56, + "learning_rate": 9.813645122059054e-06, + "loss": 0.5382, + "step": 1267 + }, + { + "epoch": 1.56, + "learning_rate": 9.800335769503756e-06, + "loss": 0.568, + "step": 1268 + }, + { + "epoch": 1.56, + "learning_rate": 9.787026770763216e-06, + "loss": 0.4701, + "step": 1269 + }, + { + "epoch": 1.57, + "learning_rate": 9.773718149421627e-06, + "loss": 0.4735, + "step": 1270 + }, + { + "epoch": 1.57, + "learning_rate": 9.760409929062518e-06, + "loss": 0.5715, + "step": 1271 + }, + { + "epoch": 1.57, + "learning_rate": 9.747102133268709e-06, + "loss": 0.5006, + "step": 1272 + }, + { + "epoch": 1.57, + "learning_rate": 9.733794785622254e-06, + "loss": 0.5082, + "step": 1273 + }, + { + "epoch": 1.57, + "learning_rate": 9.72048790970443e-06, + "loss": 0.5209, + "step": 1274 + }, + { + "epoch": 1.57, + "learning_rate": 9.707181529095663e-06, + "loss": 0.7047, + "step": 1275 + }, + { + "epoch": 1.57, + "learning_rate": 9.693875667375518e-06, + "loss": 0.5197, + "step": 1276 + }, + { + "epoch": 1.57, + "learning_rate": 9.680570348122626e-06, + "loss": 0.6685, + "step": 1277 + }, + { + "epoch": 1.58, + "learning_rate": 9.667265594914662e-06, + "loss": 0.6546, + "step": 1278 + }, + { + "epoch": 1.58, + "learning_rate": 9.653961431328295e-06, + "loss": 0.5609, + "step": 1279 + }, + { + "epoch": 1.58, + "learning_rate": 9.640657880939157e-06, + "loss": 0.6543, + "step": 1280 + }, + { + "epoch": 1.58, + "learning_rate": 9.627354967321785e-06, + "loss": 0.5296, + "step": 1281 + }, + { + "epoch": 1.58, + "learning_rate": 9.614052714049597e-06, + "loss": 0.54, + "step": 1282 + }, + { + "epoch": 1.58, + "learning_rate": 9.600751144694827e-06, + "loss": 0.5812, + "step": 1283 + }, + { + "epoch": 1.58, + "learning_rate": 9.587450282828508e-06, + "loss": 0.547, + "step": 1284 + }, + { + "epoch": 1.58, + "learning_rate": 9.574150152020415e-06, + "loss": 0.5773, + "step": 1285 + }, + { + "epoch": 1.59, + "learning_rate": 9.560850775839034e-06, + "loss": 0.5702, + "step": 1286 + }, + { + "epoch": 1.59, + "learning_rate": 9.5475521778515e-06, + "loss": 0.5487, + "step": 1287 + }, + { + "epoch": 1.59, + "learning_rate": 9.534254381623588e-06, + "loss": 0.5106, + "step": 1288 + }, + { + "epoch": 1.59, + "learning_rate": 9.520957410719632e-06, + "loss": 0.4711, + "step": 1289 + }, + { + "epoch": 1.59, + "learning_rate": 9.507661288702515e-06, + "loss": 0.538, + "step": 1290 + }, + { + "epoch": 1.59, + "learning_rate": 9.494366039133619e-06, + "loss": 0.6421, + "step": 1291 + }, + { + "epoch": 1.59, + "learning_rate": 9.481071685572769e-06, + "loss": 0.5767, + "step": 1292 + }, + { + "epoch": 1.59, + "learning_rate": 9.467778251578217e-06, + "loss": 0.5717, + "step": 1293 + }, + { + "epoch": 1.6, + "learning_rate": 9.454485760706564e-06, + "loss": 0.5049, + "step": 1294 + }, + { + "epoch": 1.6, + "learning_rate": 9.441194236512763e-06, + "loss": 0.6325, + "step": 1295 + }, + { + "epoch": 1.6, + "learning_rate": 9.427903702550034e-06, + "loss": 0.5534, + "step": 1296 + }, + { + "epoch": 1.6, + "learning_rate": 9.414614182369862e-06, + "loss": 0.6109, + "step": 1297 + }, + { + "epoch": 1.6, + "learning_rate": 9.401325699521922e-06, + "loss": 0.5426, + "step": 1298 + }, + { + "epoch": 1.6, + "learning_rate": 9.388038277554046e-06, + "loss": 0.5317, + "step": 1299 + }, + { + "epoch": 1.6, + "learning_rate": 9.374751940012203e-06, + "loss": 0.6109, + "step": 1300 + }, + { + "epoch": 1.6, + "learning_rate": 9.361466710440428e-06, + "loss": 0.5378, + "step": 1301 + }, + { + "epoch": 1.6, + "learning_rate": 9.348182612380796e-06, + "loss": 0.5346, + "step": 1302 + }, + { + "epoch": 1.61, + "learning_rate": 9.334899669373379e-06, + "loss": 0.4505, + "step": 1303 + }, + { + "epoch": 1.61, + "learning_rate": 9.3216179049562e-06, + "loss": 0.6156, + "step": 1304 + }, + { + "epoch": 1.61, + "learning_rate": 9.308337342665188e-06, + "loss": 0.5438, + "step": 1305 + }, + { + "epoch": 1.61, + "learning_rate": 9.295058006034153e-06, + "loss": 0.5866, + "step": 1306 + }, + { + "epoch": 1.61, + "learning_rate": 9.281779918594723e-06, + "loss": 0.554, + "step": 1307 + }, + { + "epoch": 1.61, + "learning_rate": 9.268503103876324e-06, + "loss": 0.5989, + "step": 1308 + }, + { + "epoch": 1.61, + "learning_rate": 9.255227585406116e-06, + "loss": 0.6214, + "step": 1309 + }, + { + "epoch": 1.61, + "learning_rate": 9.241953386708962e-06, + "loss": 0.5744, + "step": 1310 + }, + { + "epoch": 1.62, + "learning_rate": 9.22868053130739e-06, + "loss": 0.5998, + "step": 1311 + }, + { + "epoch": 1.62, + "learning_rate": 9.215409042721553e-06, + "loss": 0.4885, + "step": 1312 + }, + { + "epoch": 1.62, + "learning_rate": 9.202138944469168e-06, + "loss": 0.5322, + "step": 1313 + }, + { + "epoch": 1.62, + "learning_rate": 9.188870260065507e-06, + "loss": 0.625, + "step": 1314 + }, + { + "epoch": 1.62, + "learning_rate": 9.17560301302332e-06, + "loss": 0.5804, + "step": 1315 + }, + { + "epoch": 1.62, + "learning_rate": 9.162337226852813e-06, + "loss": 0.508, + "step": 1316 + }, + { + "epoch": 1.62, + "learning_rate": 9.149072925061614e-06, + "loss": 0.4975, + "step": 1317 + }, + { + "epoch": 1.62, + "learning_rate": 9.135810131154707e-06, + "loss": 0.5131, + "step": 1318 + }, + { + "epoch": 1.63, + "learning_rate": 9.122548868634416e-06, + "loss": 0.5623, + "step": 1319 + }, + { + "epoch": 1.63, + "learning_rate": 9.109289161000334e-06, + "loss": 0.5186, + "step": 1320 + }, + { + "epoch": 1.63, + "learning_rate": 9.096031031749321e-06, + "loss": 0.5396, + "step": 1321 + }, + { + "epoch": 1.63, + "learning_rate": 9.082774504375422e-06, + "loss": 0.6223, + "step": 1322 + }, + { + "epoch": 1.63, + "learning_rate": 9.069519602369856e-06, + "loss": 0.5696, + "step": 1323 + }, + { + "epoch": 1.63, + "learning_rate": 9.056266349220951e-06, + "loss": 0.6405, + "step": 1324 + }, + { + "epoch": 1.63, + "learning_rate": 9.043014768414125e-06, + "loss": 0.4751, + "step": 1325 + }, + { + "epoch": 1.63, + "learning_rate": 9.029764883431818e-06, + "loss": 0.5887, + "step": 1326 + }, + { + "epoch": 1.64, + "learning_rate": 9.016516717753474e-06, + "loss": 0.5308, + "step": 1327 + }, + { + "epoch": 1.64, + "learning_rate": 9.003270294855491e-06, + "loss": 0.6018, + "step": 1328 + }, + { + "epoch": 1.64, + "learning_rate": 8.99002563821118e-06, + "loss": 0.4905, + "step": 1329 + }, + { + "epoch": 1.64, + "learning_rate": 8.976782771290715e-06, + "loss": 0.5102, + "step": 1330 + }, + { + "epoch": 1.64, + "learning_rate": 8.9635417175611e-06, + "loss": 0.5821, + "step": 1331 + }, + { + "epoch": 1.64, + "learning_rate": 8.95030250048613e-06, + "loss": 0.5619, + "step": 1332 + }, + { + "epoch": 1.64, + "learning_rate": 8.937065143526349e-06, + "loss": 0.5037, + "step": 1333 + }, + { + "epoch": 1.64, + "learning_rate": 8.92382967013899e-06, + "loss": 0.4878, + "step": 1334 + }, + { + "epoch": 1.65, + "learning_rate": 8.910596103777965e-06, + "loss": 0.5217, + "step": 1335 + }, + { + "epoch": 1.65, + "learning_rate": 8.897364467893797e-06, + "loss": 0.479, + "step": 1336 + }, + { + "epoch": 1.65, + "learning_rate": 8.884134785933588e-06, + "loss": 0.4989, + "step": 1337 + }, + { + "epoch": 1.65, + "learning_rate": 8.870907081340983e-06, + "loss": 0.5119, + "step": 1338 + }, + { + "epoch": 1.65, + "learning_rate": 8.857681377556117e-06, + "loss": 0.6321, + "step": 1339 + }, + { + "epoch": 1.65, + "learning_rate": 8.844457698015588e-06, + "loss": 0.4308, + "step": 1340 + }, + { + "epoch": 1.65, + "learning_rate": 8.831236066152397e-06, + "loss": 0.5542, + "step": 1341 + }, + { + "epoch": 1.65, + "learning_rate": 8.818016505395921e-06, + "loss": 0.585, + "step": 1342 + }, + { + "epoch": 1.66, + "learning_rate": 8.804799039171863e-06, + "loss": 0.5585, + "step": 1343 + }, + { + "epoch": 1.66, + "learning_rate": 8.791583690902226e-06, + "loss": 0.5806, + "step": 1344 + }, + { + "epoch": 1.66, + "learning_rate": 8.778370484005245e-06, + "loss": 0.4797, + "step": 1345 + }, + { + "epoch": 1.66, + "learning_rate": 8.765159441895376e-06, + "loss": 0.5148, + "step": 1346 + }, + { + "epoch": 1.66, + "learning_rate": 8.751950587983221e-06, + "loss": 0.5539, + "step": 1347 + }, + { + "epoch": 1.66, + "learning_rate": 8.738743945675515e-06, + "loss": 0.5215, + "step": 1348 + }, + { + "epoch": 1.66, + "learning_rate": 8.725539538375078e-06, + "loss": 0.5428, + "step": 1349 + }, + { + "epoch": 1.66, + "learning_rate": 8.712337389480758e-06, + "loss": 0.4676, + "step": 1350 + }, + { + "epoch": 1.67, + "learning_rate": 8.699137522387415e-06, + "loss": 0.4829, + "step": 1351 + }, + { + "epoch": 1.67, + "learning_rate": 8.685939960485846e-06, + "loss": 0.5762, + "step": 1352 + }, + { + "epoch": 1.67, + "learning_rate": 8.672744727162782e-06, + "loss": 0.5142, + "step": 1353 + }, + { + "epoch": 1.67, + "learning_rate": 8.659551845800818e-06, + "loss": 0.5234, + "step": 1354 + }, + { + "epoch": 1.67, + "learning_rate": 8.646361339778386e-06, + "loss": 0.5152, + "step": 1355 + }, + { + "epoch": 1.67, + "learning_rate": 8.633173232469707e-06, + "loss": 0.5449, + "step": 1356 + }, + { + "epoch": 1.67, + "learning_rate": 8.619987547244746e-06, + "loss": 0.5545, + "step": 1357 + }, + { + "epoch": 1.67, + "learning_rate": 8.606804307469182e-06, + "loss": 0.5465, + "step": 1358 + }, + { + "epoch": 1.68, + "learning_rate": 8.59362353650436e-06, + "loss": 0.6104, + "step": 1359 + }, + { + "epoch": 1.68, + "learning_rate": 8.580445257707246e-06, + "loss": 0.4816, + "step": 1360 + }, + { + "epoch": 1.68, + "learning_rate": 8.567269494430404e-06, + "loss": 0.6053, + "step": 1361 + }, + { + "epoch": 1.68, + "learning_rate": 8.554096270021917e-06, + "loss": 0.6355, + "step": 1362 + }, + { + "epoch": 1.68, + "learning_rate": 8.540925607825385e-06, + "loss": 0.4961, + "step": 1363 + }, + { + "epoch": 1.68, + "learning_rate": 8.527757531179866e-06, + "loss": 0.5935, + "step": 1364 + }, + { + "epoch": 1.68, + "learning_rate": 8.514592063419833e-06, + "loss": 0.5945, + "step": 1365 + }, + { + "epoch": 1.68, + "learning_rate": 8.501429227875137e-06, + "loss": 0.5842, + "step": 1366 + }, + { + "epoch": 1.69, + "learning_rate": 8.488269047870968e-06, + "loss": 0.5052, + "step": 1367 + }, + { + "epoch": 1.69, + "learning_rate": 8.475111546727802e-06, + "loss": 0.5677, + "step": 1368 + }, + { + "epoch": 1.69, + "learning_rate": 8.461956747761375e-06, + "loss": 0.5367, + "step": 1369 + }, + { + "epoch": 1.69, + "learning_rate": 8.448804674282633e-06, + "loss": 0.6271, + "step": 1370 + }, + { + "epoch": 1.69, + "learning_rate": 8.43565534959769e-06, + "loss": 0.6137, + "step": 1371 + }, + { + "epoch": 1.69, + "learning_rate": 8.422508797007798e-06, + "loss": 0.6369, + "step": 1372 + }, + { + "epoch": 1.69, + "learning_rate": 8.409365039809282e-06, + "loss": 0.5954, + "step": 1373 + }, + { + "epoch": 1.69, + "learning_rate": 8.396224101293523e-06, + "loss": 0.5645, + "step": 1374 + }, + { + "epoch": 1.69, + "learning_rate": 8.383086004746903e-06, + "loss": 0.5453, + "step": 1375 + }, + { + "epoch": 1.7, + "learning_rate": 8.369950773450774e-06, + "loss": 0.5044, + "step": 1376 + }, + { + "epoch": 1.7, + "learning_rate": 8.356818430681409e-06, + "loss": 0.6318, + "step": 1377 + }, + { + "epoch": 1.7, + "learning_rate": 8.343688999709953e-06, + "loss": 0.533, + "step": 1378 + }, + { + "epoch": 1.7, + "learning_rate": 8.330562503802402e-06, + "loss": 0.6221, + "step": 1379 + }, + { + "epoch": 1.7, + "learning_rate": 8.317438966219546e-06, + "loss": 0.4671, + "step": 1380 + }, + { + "epoch": 1.7, + "learning_rate": 8.304318410216937e-06, + "loss": 0.5248, + "step": 1381 + }, + { + "epoch": 1.7, + "learning_rate": 8.291200859044836e-06, + "loss": 0.5349, + "step": 1382 + }, + { + "epoch": 1.7, + "learning_rate": 8.278086335948191e-06, + "loss": 0.5211, + "step": 1383 + }, + { + "epoch": 1.71, + "learning_rate": 8.264974864166566e-06, + "loss": 0.5882, + "step": 1384 + }, + { + "epoch": 1.71, + "learning_rate": 8.251866466934137e-06, + "loss": 0.6037, + "step": 1385 + }, + { + "epoch": 1.71, + "learning_rate": 8.23876116747962e-06, + "loss": 0.6253, + "step": 1386 + }, + { + "epoch": 1.71, + "learning_rate": 8.225658989026245e-06, + "loss": 0.6069, + "step": 1387 + }, + { + "epoch": 1.71, + "learning_rate": 8.212559954791718e-06, + "loss": 0.5798, + "step": 1388 + }, + { + "epoch": 1.71, + "learning_rate": 8.199464087988158e-06, + "loss": 0.5389, + "step": 1389 + }, + { + "epoch": 1.71, + "learning_rate": 8.18637141182208e-06, + "loss": 0.5249, + "step": 1390 + }, + { + "epoch": 1.71, + "learning_rate": 8.173281949494352e-06, + "loss": 0.5362, + "step": 1391 + }, + { + "epoch": 1.72, + "learning_rate": 8.160195724200132e-06, + "loss": 0.5533, + "step": 1392 + }, + { + "epoch": 1.72, + "learning_rate": 8.147112759128859e-06, + "loss": 0.5616, + "step": 1393 + }, + { + "epoch": 1.72, + "learning_rate": 8.134033077464177e-06, + "loss": 0.4979, + "step": 1394 + }, + { + "epoch": 1.72, + "learning_rate": 8.120956702383922e-06, + "loss": 0.4791, + "step": 1395 + }, + { + "epoch": 1.72, + "learning_rate": 8.107883657060072e-06, + "loss": 0.4978, + "step": 1396 + }, + { + "epoch": 1.72, + "learning_rate": 8.094813964658698e-06, + "loss": 0.5612, + "step": 1397 + }, + { + "epoch": 1.72, + "learning_rate": 8.081747648339939e-06, + "loss": 0.7394, + "step": 1398 + }, + { + "epoch": 1.72, + "learning_rate": 8.068684731257932e-06, + "loss": 0.5848, + "step": 1399 + }, + { + "epoch": 1.73, + "learning_rate": 8.055625236560813e-06, + "loss": 0.5882, + "step": 1400 + }, + { + "epoch": 1.73, + "learning_rate": 8.042569187390642e-06, + "loss": 0.5685, + "step": 1401 + }, + { + "epoch": 1.73, + "learning_rate": 8.029516606883376e-06, + "loss": 0.5368, + "step": 1402 + }, + { + "epoch": 1.73, + "learning_rate": 8.01646751816882e-06, + "loss": 0.5684, + "step": 1403 + }, + { + "epoch": 1.73, + "learning_rate": 8.003421944370607e-06, + "loss": 0.4981, + "step": 1404 + }, + { + "epoch": 1.73, + "learning_rate": 7.990379908606118e-06, + "loss": 0.5292, + "step": 1405 + }, + { + "epoch": 1.73, + "learning_rate": 7.977341433986481e-06, + "loss": 0.7444, + "step": 1406 + }, + { + "epoch": 1.73, + "learning_rate": 7.964306543616509e-06, + "loss": 0.4861, + "step": 1407 + }, + { + "epoch": 1.74, + "learning_rate": 7.951275260594666e-06, + "loss": 0.562, + "step": 1408 + }, + { + "epoch": 1.74, + "learning_rate": 7.938247608013021e-06, + "loss": 0.5561, + "step": 1409 + }, + { + "epoch": 1.74, + "learning_rate": 7.925223608957205e-06, + "loss": 0.4738, + "step": 1410 + }, + { + "epoch": 1.74, + "learning_rate": 7.912203286506386e-06, + "loss": 0.6379, + "step": 1411 + }, + { + "epoch": 1.74, + "learning_rate": 7.899186663733204e-06, + "loss": 0.5213, + "step": 1412 + }, + { + "epoch": 1.74, + "learning_rate": 7.886173763703757e-06, + "loss": 0.5223, + "step": 1413 + }, + { + "epoch": 1.74, + "learning_rate": 7.873164609477537e-06, + "loss": 0.588, + "step": 1414 + }, + { + "epoch": 1.74, + "learning_rate": 7.860159224107397e-06, + "loss": 0.5685, + "step": 1415 + }, + { + "epoch": 1.75, + "learning_rate": 7.847157630639513e-06, + "loss": 0.5309, + "step": 1416 + }, + { + "epoch": 1.75, + "learning_rate": 7.834159852113347e-06, + "loss": 0.624, + "step": 1417 + }, + { + "epoch": 1.75, + "learning_rate": 7.821165911561596e-06, + "loss": 0.5482, + "step": 1418 + }, + { + "epoch": 1.75, + "learning_rate": 7.808175832010158e-06, + "loss": 0.4744, + "step": 1419 + }, + { + "epoch": 1.75, + "learning_rate": 7.795189636478084e-06, + "loss": 0.5059, + "step": 1420 + }, + { + "epoch": 1.75, + "learning_rate": 7.78220734797755e-06, + "loss": 0.528, + "step": 1421 + }, + { + "epoch": 1.75, + "learning_rate": 7.7692289895138e-06, + "loss": 0.5184, + "step": 1422 + }, + { + "epoch": 1.75, + "learning_rate": 7.756254584085121e-06, + "loss": 0.4594, + "step": 1423 + }, + { + "epoch": 1.76, + "learning_rate": 7.743284154682792e-06, + "loss": 0.4871, + "step": 1424 + }, + { + "epoch": 1.76, + "learning_rate": 7.73031772429105e-06, + "loss": 0.5061, + "step": 1425 + }, + { + "epoch": 1.76, + "learning_rate": 7.71735531588704e-06, + "loss": 0.5723, + "step": 1426 + }, + { + "epoch": 1.76, + "learning_rate": 7.704396952440778e-06, + "loss": 0.5852, + "step": 1427 + }, + { + "epoch": 1.76, + "learning_rate": 7.691442656915122e-06, + "loss": 0.5698, + "step": 1428 + }, + { + "epoch": 1.76, + "learning_rate": 7.678492452265713e-06, + "loss": 0.6188, + "step": 1429 + }, + { + "epoch": 1.76, + "learning_rate": 7.66554636144095e-06, + "loss": 0.5528, + "step": 1430 + }, + { + "epoch": 1.76, + "learning_rate": 7.652604407381927e-06, + "loss": 0.4962, + "step": 1431 + }, + { + "epoch": 1.77, + "learning_rate": 7.63966661302243e-06, + "loss": 0.5354, + "step": 1432 + }, + { + "epoch": 1.77, + "learning_rate": 7.626733001288852e-06, + "loss": 0.6037, + "step": 1433 + }, + { + "epoch": 1.77, + "learning_rate": 7.613803595100191e-06, + "loss": 0.5642, + "step": 1434 + }, + { + "epoch": 1.77, + "learning_rate": 7.600878417367986e-06, + "loss": 0.5552, + "step": 1435 + }, + { + "epoch": 1.77, + "learning_rate": 7.587957490996276e-06, + "loss": 0.5917, + "step": 1436 + }, + { + "epoch": 1.77, + "learning_rate": 7.575040838881578e-06, + "loss": 0.4955, + "step": 1437 + }, + { + "epoch": 1.77, + "learning_rate": 7.56212848391283e-06, + "loss": 0.4655, + "step": 1438 + }, + { + "epoch": 1.77, + "learning_rate": 7.5492204489713496e-06, + "loss": 0.5805, + "step": 1439 + }, + { + "epoch": 1.77, + "learning_rate": 7.536316756930811e-06, + "loss": 0.5269, + "step": 1440 + }, + { + "epoch": 1.78, + "learning_rate": 7.523417430657186e-06, + "loss": 0.5041, + "step": 1441 + }, + { + "epoch": 1.78, + "learning_rate": 7.510522493008703e-06, + "loss": 0.557, + "step": 1442 + }, + { + "epoch": 1.78, + "learning_rate": 7.497631966835828e-06, + "loss": 0.5283, + "step": 1443 + }, + { + "epoch": 1.78, + "learning_rate": 7.484745874981196e-06, + "loss": 0.5773, + "step": 1444 + }, + { + "epoch": 1.78, + "learning_rate": 7.471864240279598e-06, + "loss": 0.5015, + "step": 1445 + }, + { + "epoch": 1.78, + "learning_rate": 7.458987085557916e-06, + "loss": 0.6004, + "step": 1446 + }, + { + "epoch": 1.78, + "learning_rate": 7.446114433635094e-06, + "loss": 0.6328, + "step": 1447 + }, + { + "epoch": 1.78, + "learning_rate": 7.433246307322099e-06, + "loss": 0.4885, + "step": 1448 + }, + { + "epoch": 1.79, + "learning_rate": 7.420382729421883e-06, + "loss": 0.4946, + "step": 1449 + }, + { + "epoch": 1.79, + "learning_rate": 7.4075237227293285e-06, + "loss": 0.5858, + "step": 1450 + }, + { + "epoch": 1.79, + "learning_rate": 7.3946693100312305e-06, + "loss": 0.5818, + "step": 1451 + }, + { + "epoch": 1.79, + "learning_rate": 7.3818195141062286e-06, + "loss": 0.5666, + "step": 1452 + }, + { + "epoch": 1.79, + "learning_rate": 7.368974357724789e-06, + "loss": 0.51, + "step": 1453 + }, + { + "epoch": 1.79, + "learning_rate": 7.356133863649155e-06, + "loss": 0.6107, + "step": 1454 + }, + { + "epoch": 1.79, + "learning_rate": 7.343298054633314e-06, + "loss": 0.5256, + "step": 1455 + }, + { + "epoch": 1.79, + "learning_rate": 7.3304669534229424e-06, + "loss": 0.6266, + "step": 1456 + }, + { + "epoch": 1.8, + "learning_rate": 7.317640582755373e-06, + "loss": 0.4227, + "step": 1457 + }, + { + "epoch": 1.8, + "learning_rate": 7.304818965359567e-06, + "loss": 0.5468, + "step": 1458 + }, + { + "epoch": 1.8, + "learning_rate": 7.292002123956052e-06, + "loss": 0.547, + "step": 1459 + }, + { + "epoch": 1.8, + "learning_rate": 7.2791900812569e-06, + "loss": 0.6453, + "step": 1460 + }, + { + "epoch": 1.8, + "learning_rate": 7.266382859965673e-06, + "loss": 0.5326, + "step": 1461 + }, + { + "epoch": 1.8, + "learning_rate": 7.253580482777395e-06, + "loss": 0.4997, + "step": 1462 + }, + { + "epoch": 1.8, + "learning_rate": 7.2407829723784965e-06, + "loss": 0.5204, + "step": 1463 + }, + { + "epoch": 1.8, + "learning_rate": 7.227990351446797e-06, + "loss": 0.4646, + "step": 1464 + }, + { + "epoch": 1.81, + "learning_rate": 7.2152026426514395e-06, + "loss": 0.4634, + "step": 1465 + }, + { + "epoch": 1.81, + "learning_rate": 7.202419868652875e-06, + "loss": 0.5461, + "step": 1466 + }, + { + "epoch": 1.81, + "learning_rate": 7.189642052102799e-06, + "loss": 0.5295, + "step": 1467 + }, + { + "epoch": 1.81, + "learning_rate": 7.1768692156441225e-06, + "loss": 0.5827, + "step": 1468 + }, + { + "epoch": 1.81, + "learning_rate": 7.164101381910939e-06, + "loss": 0.505, + "step": 1469 + }, + { + "epoch": 1.81, + "learning_rate": 7.151338573528471e-06, + "loss": 0.5948, + "step": 1470 + }, + { + "epoch": 1.81, + "learning_rate": 7.138580813113038e-06, + "loss": 0.5359, + "step": 1471 + }, + { + "epoch": 1.81, + "learning_rate": 7.125828123272016e-06, + "loss": 0.5049, + "step": 1472 + }, + { + "epoch": 1.82, + "learning_rate": 7.113080526603793e-06, + "loss": 0.5161, + "step": 1473 + }, + { + "epoch": 1.82, + "learning_rate": 7.100338045697727e-06, + "loss": 0.5361, + "step": 1474 + }, + { + "epoch": 1.82, + "learning_rate": 7.087600703134123e-06, + "loss": 0.575, + "step": 1475 + }, + { + "epoch": 1.82, + "learning_rate": 7.074868521484167e-06, + "loss": 0.5481, + "step": 1476 + }, + { + "epoch": 1.82, + "learning_rate": 7.062141523309918e-06, + "loss": 0.5742, + "step": 1477 + }, + { + "epoch": 1.82, + "learning_rate": 7.049419731164221e-06, + "loss": 0.6022, + "step": 1478 + }, + { + "epoch": 1.82, + "learning_rate": 7.036703167590724e-06, + "loss": 0.5573, + "step": 1479 + }, + { + "epoch": 1.82, + "learning_rate": 7.023991855123793e-06, + "loss": 0.4864, + "step": 1480 + }, + { + "epoch": 1.83, + "learning_rate": 7.011285816288496e-06, + "loss": 0.5354, + "step": 1481 + }, + { + "epoch": 1.83, + "learning_rate": 6.998585073600552e-06, + "loss": 0.5219, + "step": 1482 + }, + { + "epoch": 1.83, + "learning_rate": 6.9858896495663046e-06, + "loss": 0.5856, + "step": 1483 + }, + { + "epoch": 1.83, + "learning_rate": 6.973199566682653e-06, + "loss": 0.4696, + "step": 1484 + }, + { + "epoch": 1.83, + "learning_rate": 6.96051484743705e-06, + "loss": 0.5239, + "step": 1485 + }, + { + "epoch": 1.83, + "learning_rate": 6.947835514307433e-06, + "loss": 0.5953, + "step": 1486 + }, + { + "epoch": 1.83, + "learning_rate": 6.935161589762204e-06, + "loss": 0.4928, + "step": 1487 + }, + { + "epoch": 1.83, + "learning_rate": 6.922493096260174e-06, + "loss": 0.5384, + "step": 1488 + }, + { + "epoch": 1.84, + "learning_rate": 6.909830056250527e-06, + "loss": 0.5605, + "step": 1489 + }, + { + "epoch": 1.84, + "learning_rate": 6.897172492172793e-06, + "loss": 0.5389, + "step": 1490 + }, + { + "epoch": 1.84, + "learning_rate": 6.88452042645679e-06, + "loss": 0.4586, + "step": 1491 + }, + { + "epoch": 1.84, + "learning_rate": 6.8718738815226e-06, + "loss": 0.5247, + "step": 1492 + }, + { + "epoch": 1.84, + "learning_rate": 6.859232879780515e-06, + "loss": 0.5356, + "step": 1493 + }, + { + "epoch": 1.84, + "learning_rate": 6.846597443631005e-06, + "loss": 0.5678, + "step": 1494 + }, + { + "epoch": 1.84, + "learning_rate": 6.833967595464679e-06, + "loss": 0.5726, + "step": 1495 + }, + { + "epoch": 1.84, + "learning_rate": 6.821343357662249e-06, + "loss": 0.5192, + "step": 1496 + }, + { + "epoch": 1.85, + "learning_rate": 6.8087247525944745e-06, + "loss": 0.5449, + "step": 1497 + }, + { + "epoch": 1.85, + "learning_rate": 6.796111802622148e-06, + "loss": 0.5564, + "step": 1498 + }, + { + "epoch": 1.85, + "learning_rate": 6.783504530096023e-06, + "loss": 0.562, + "step": 1499 + }, + { + "epoch": 1.85, + "learning_rate": 6.770902957356802e-06, + "loss": 0.5878, + "step": 1500 + }, + { + "epoch": 1.85, + "learning_rate": 6.758307106735094e-06, + "loss": 0.4945, + "step": 1501 + }, + { + "epoch": 1.85, + "learning_rate": 6.745717000551356e-06, + "loss": 0.6107, + "step": 1502 + }, + { + "epoch": 1.85, + "learning_rate": 6.73313266111587e-06, + "loss": 0.5483, + "step": 1503 + }, + { + "epoch": 1.85, + "learning_rate": 6.720554110728703e-06, + "loss": 0.602, + "step": 1504 + }, + { + "epoch": 1.86, + "learning_rate": 6.707981371679657e-06, + "loss": 0.5029, + "step": 1505 + }, + { + "epoch": 1.86, + "learning_rate": 6.6954144662482375e-06, + "loss": 0.6, + "step": 1506 + }, + { + "epoch": 1.86, + "learning_rate": 6.682853416703619e-06, + "loss": 0.5179, + "step": 1507 + }, + { + "epoch": 1.86, + "learning_rate": 6.670298245304587e-06, + "loss": 0.5245, + "step": 1508 + }, + { + "epoch": 1.86, + "learning_rate": 6.657748974299529e-06, + "loss": 0.5461, + "step": 1509 + }, + { + "epoch": 1.86, + "learning_rate": 6.645205625926354e-06, + "loss": 0.564, + "step": 1510 + }, + { + "epoch": 1.86, + "learning_rate": 6.6326682224124925e-06, + "loss": 0.4821, + "step": 1511 + }, + { + "epoch": 1.86, + "learning_rate": 6.620136785974834e-06, + "loss": 0.5846, + "step": 1512 + }, + { + "epoch": 1.86, + "learning_rate": 6.607611338819697e-06, + "loss": 0.512, + "step": 1513 + }, + { + "epoch": 1.87, + "learning_rate": 6.5950919031427874e-06, + "loss": 0.4794, + "step": 1514 + }, + { + "epoch": 1.87, + "learning_rate": 6.582578501129147e-06, + "loss": 0.4321, + "step": 1515 + }, + { + "epoch": 1.87, + "learning_rate": 6.5700711549531435e-06, + "loss": 0.4924, + "step": 1516 + }, + { + "epoch": 1.87, + "learning_rate": 6.557569886778401e-06, + "loss": 0.5929, + "step": 1517 + }, + { + "epoch": 1.87, + "learning_rate": 6.5450747187577745e-06, + "loss": 0.5654, + "step": 1518 + }, + { + "epoch": 1.87, + "learning_rate": 6.532585673033317e-06, + "loss": 0.5074, + "step": 1519 + }, + { + "epoch": 1.87, + "learning_rate": 6.520102771736225e-06, + "loss": 0.5781, + "step": 1520 + }, + { + "epoch": 1.87, + "learning_rate": 6.507626036986804e-06, + "loss": 0.5409, + "step": 1521 + }, + { + "epoch": 1.88, + "learning_rate": 6.495155490894442e-06, + "loss": 0.433, + "step": 1522 + }, + { + "epoch": 1.88, + "learning_rate": 6.48269115555755e-06, + "loss": 0.5725, + "step": 1523 + }, + { + "epoch": 1.88, + "learning_rate": 6.470233053063546e-06, + "loss": 0.5355, + "step": 1524 + }, + { + "epoch": 1.88, + "learning_rate": 6.457781205488791e-06, + "loss": 0.5495, + "step": 1525 + }, + { + "epoch": 1.88, + "learning_rate": 6.445335634898567e-06, + "loss": 0.5961, + "step": 1526 + }, + { + "epoch": 1.88, + "learning_rate": 6.432896363347028e-06, + "loss": 0.5973, + "step": 1527 + }, + { + "epoch": 1.88, + "learning_rate": 6.420463412877176e-06, + "loss": 0.5531, + "step": 1528 + }, + { + "epoch": 1.88, + "learning_rate": 6.408036805520801e-06, + "loss": 0.4906, + "step": 1529 + }, + { + "epoch": 1.89, + "learning_rate": 6.395616563298466e-06, + "loss": 0.527, + "step": 1530 + }, + { + "epoch": 1.89, + "learning_rate": 6.3832027082194385e-06, + "loss": 0.5394, + "step": 1531 + }, + { + "epoch": 1.89, + "learning_rate": 6.370795262281675e-06, + "loss": 0.6113, + "step": 1532 + }, + { + "epoch": 1.89, + "learning_rate": 6.358394247471779e-06, + "loss": 0.4755, + "step": 1533 + }, + { + "epoch": 1.89, + "learning_rate": 6.3459996857649516e-06, + "loss": 0.4681, + "step": 1534 + }, + { + "epoch": 1.89, + "learning_rate": 6.333611599124966e-06, + "loss": 0.573, + "step": 1535 + }, + { + "epoch": 1.89, + "learning_rate": 6.321230009504107e-06, + "loss": 0.5013, + "step": 1536 + }, + { + "epoch": 1.89, + "learning_rate": 6.308854938843161e-06, + "loss": 0.6231, + "step": 1537 + }, + { + "epoch": 1.9, + "learning_rate": 6.296486409071354e-06, + "loss": 0.5352, + "step": 1538 + }, + { + "epoch": 1.9, + "learning_rate": 6.28412444210633e-06, + "loss": 0.5859, + "step": 1539 + }, + { + "epoch": 1.9, + "learning_rate": 6.271769059854092e-06, + "loss": 0.453, + "step": 1540 + }, + { + "epoch": 1.9, + "learning_rate": 6.259420284208987e-06, + "loss": 0.5412, + "step": 1541 + }, + { + "epoch": 1.9, + "learning_rate": 6.247078137053637e-06, + "loss": 0.533, + "step": 1542 + }, + { + "epoch": 1.9, + "learning_rate": 6.234742640258938e-06, + "loss": 0.6573, + "step": 1543 + }, + { + "epoch": 1.9, + "learning_rate": 6.222413815683987e-06, + "loss": 0.5209, + "step": 1544 + }, + { + "epoch": 1.9, + "learning_rate": 6.210091685176067e-06, + "loss": 0.5536, + "step": 1545 + }, + { + "epoch": 1.91, + "learning_rate": 6.197776270570594e-06, + "loss": 0.5907, + "step": 1546 + }, + { + "epoch": 1.91, + "learning_rate": 6.185467593691081e-06, + "loss": 0.6496, + "step": 1547 + }, + { + "epoch": 1.91, + "learning_rate": 6.173165676349103e-06, + "loss": 0.571, + "step": 1548 + }, + { + "epoch": 1.91, + "learning_rate": 6.160870540344261e-06, + "loss": 0.575, + "step": 1549 + }, + { + "epoch": 1.91, + "learning_rate": 6.148582207464134e-06, + "loss": 0.4739, + "step": 1550 + }, + { + "epoch": 1.91, + "learning_rate": 6.13630069948425e-06, + "loss": 0.5767, + "step": 1551 + }, + { + "epoch": 1.91, + "learning_rate": 6.124026038168039e-06, + "loss": 0.4838, + "step": 1552 + }, + { + "epoch": 1.91, + "learning_rate": 6.111758245266795e-06, + "loss": 0.532, + "step": 1553 + }, + { + "epoch": 1.92, + "learning_rate": 6.099497342519651e-06, + "loss": 0.5599, + "step": 1554 + }, + { + "epoch": 1.92, + "learning_rate": 6.0872433516535225e-06, + "loss": 0.5179, + "step": 1555 + }, + { + "epoch": 1.92, + "learning_rate": 6.0749962943830865e-06, + "loss": 0.5736, + "step": 1556 + }, + { + "epoch": 1.92, + "learning_rate": 6.0627561924107145e-06, + "loss": 0.4923, + "step": 1557 + }, + { + "epoch": 1.92, + "learning_rate": 6.05052306742647e-06, + "loss": 0.5896, + "step": 1558 + }, + { + "epoch": 1.92, + "learning_rate": 6.038296941108046e-06, + "loss": 0.4628, + "step": 1559 + }, + { + "epoch": 1.92, + "learning_rate": 6.0260778351207386e-06, + "loss": 0.5315, + "step": 1560 + }, + { + "epoch": 1.92, + "learning_rate": 6.013865771117394e-06, + "loss": 0.584, + "step": 1561 + }, + { + "epoch": 1.93, + "learning_rate": 6.001660770738394e-06, + "loss": 0.5021, + "step": 1562 + }, + { + "epoch": 1.93, + "learning_rate": 5.989462855611585e-06, + "loss": 0.5782, + "step": 1563 + }, + { + "epoch": 1.93, + "learning_rate": 5.977272047352274e-06, + "loss": 0.3732, + "step": 1564 + }, + { + "epoch": 1.93, + "learning_rate": 5.965088367563162e-06, + "loss": 0.5936, + "step": 1565 + }, + { + "epoch": 1.93, + "learning_rate": 5.952911837834332e-06, + "loss": 0.5809, + "step": 1566 + }, + { + "epoch": 1.93, + "learning_rate": 5.940742479743186e-06, + "loss": 0.5276, + "step": 1567 + }, + { + "epoch": 1.93, + "learning_rate": 5.9285803148544155e-06, + "loss": 0.5872, + "step": 1568 + }, + { + "epoch": 1.93, + "learning_rate": 5.916425364719975e-06, + "loss": 0.6352, + "step": 1569 + }, + { + "epoch": 1.94, + "learning_rate": 5.904277650879027e-06, + "loss": 0.6446, + "step": 1570 + }, + { + "epoch": 1.94, + "learning_rate": 5.892137194857914e-06, + "loss": 0.5808, + "step": 1571 + }, + { + "epoch": 1.94, + "learning_rate": 5.88000401817012e-06, + "loss": 0.539, + "step": 1572 + }, + { + "epoch": 1.94, + "learning_rate": 5.867878142316221e-06, + "loss": 0.5012, + "step": 1573 + }, + { + "epoch": 1.94, + "learning_rate": 5.855759588783861e-06, + "loss": 0.5604, + "step": 1574 + }, + { + "epoch": 1.94, + "learning_rate": 5.843648379047708e-06, + "loss": 0.4624, + "step": 1575 + }, + { + "epoch": 1.94, + "learning_rate": 5.8315445345694196e-06, + "loss": 0.4802, + "step": 1576 + }, + { + "epoch": 1.94, + "learning_rate": 5.8194480767976e-06, + "loss": 0.6033, + "step": 1577 + }, + { + "epoch": 1.95, + "learning_rate": 5.807359027167753e-06, + "loss": 0.4447, + "step": 1578 + }, + { + "epoch": 1.95, + "learning_rate": 5.795277407102273e-06, + "loss": 0.5931, + "step": 1579 + }, + { + "epoch": 1.95, + "learning_rate": 5.783203238010382e-06, + "loss": 0.5294, + "step": 1580 + }, + { + "epoch": 1.95, + "learning_rate": 5.7711365412880895e-06, + "loss": 0.5798, + "step": 1581 + }, + { + "epoch": 1.95, + "learning_rate": 5.759077338318173e-06, + "loss": 0.5927, + "step": 1582 + }, + { + "epoch": 1.95, + "learning_rate": 5.747025650470135e-06, + "loss": 0.5056, + "step": 1583 + }, + { + "epoch": 1.95, + "learning_rate": 5.734981499100145e-06, + "loss": 0.5459, + "step": 1584 + }, + { + "epoch": 1.95, + "learning_rate": 5.7229449055510335e-06, + "loss": 0.5095, + "step": 1585 + }, + { + "epoch": 1.95, + "learning_rate": 5.710915891152227e-06, + "loss": 0.6088, + "step": 1586 + }, + { + "epoch": 1.96, + "learning_rate": 5.698894477219726e-06, + "loss": 0.5857, + "step": 1587 + }, + { + "epoch": 1.96, + "learning_rate": 5.68688068505607e-06, + "loss": 0.5074, + "step": 1588 + }, + { + "epoch": 1.96, + "learning_rate": 5.674874535950279e-06, + "loss": 0.5006, + "step": 1589 + }, + { + "epoch": 1.96, + "learning_rate": 5.662876051177831e-06, + "loss": 0.5483, + "step": 1590 + }, + { + "epoch": 1.96, + "learning_rate": 5.650885252000631e-06, + "loss": 0.6472, + "step": 1591 + }, + { + "epoch": 1.96, + "learning_rate": 5.638902159666962e-06, + "loss": 0.5702, + "step": 1592 + }, + { + "epoch": 1.96, + "learning_rate": 5.626926795411447e-06, + "loss": 0.5984, + "step": 1593 + }, + { + "epoch": 1.96, + "learning_rate": 5.614959180455016e-06, + "loss": 0.477, + "step": 1594 + }, + { + "epoch": 1.97, + "learning_rate": 5.602999336004862e-06, + "loss": 0.5572, + "step": 1595 + }, + { + "epoch": 1.97, + "learning_rate": 5.591047283254417e-06, + "loss": 0.5436, + "step": 1596 + }, + { + "epoch": 1.97, + "learning_rate": 5.579103043383305e-06, + "loss": 0.4919, + "step": 1597 + }, + { + "epoch": 1.97, + "learning_rate": 5.567166637557293e-06, + "loss": 0.5874, + "step": 1598 + }, + { + "epoch": 1.97, + "learning_rate": 5.5552380869282855e-06, + "loss": 0.5041, + "step": 1599 + }, + { + "epoch": 1.97, + "learning_rate": 5.543317412634244e-06, + "loss": 0.5455, + "step": 1600 + }, + { + "epoch": 1.97, + "learning_rate": 5.531404635799191e-06, + "loss": 0.5052, + "step": 1601 + }, + { + "epoch": 1.97, + "learning_rate": 5.519499777533154e-06, + "loss": 0.4949, + "step": 1602 + }, + { + "epoch": 1.98, + "learning_rate": 5.507602858932113e-06, + "loss": 0.5435, + "step": 1603 + }, + { + "epoch": 1.98, + "learning_rate": 5.495713901077995e-06, + "loss": 0.5928, + "step": 1604 + }, + { + "epoch": 1.98, + "learning_rate": 5.4838329250386076e-06, + "loss": 0.6195, + "step": 1605 + }, + { + "epoch": 1.98, + "learning_rate": 5.471959951867627e-06, + "loss": 0.5406, + "step": 1606 + }, + { + "epoch": 1.98, + "learning_rate": 5.460095002604533e-06, + "loss": 0.4779, + "step": 1607 + }, + { + "epoch": 1.98, + "learning_rate": 5.4482380982745985e-06, + "loss": 0.5761, + "step": 1608 + }, + { + "epoch": 1.98, + "learning_rate": 5.436389259888841e-06, + "loss": 0.518, + "step": 1609 + }, + { + "epoch": 1.98, + "learning_rate": 5.424548508443972e-06, + "loss": 0.5765, + "step": 1610 + }, + { + "epoch": 1.99, + "learning_rate": 5.412715864922389e-06, + "loss": 0.5839, + "step": 1611 + }, + { + "epoch": 1.99, + "learning_rate": 5.400891350292105e-06, + "loss": 0.5213, + "step": 1612 + }, + { + "epoch": 1.99, + "learning_rate": 5.38907498550674e-06, + "loss": 0.5741, + "step": 1613 + }, + { + "epoch": 1.99, + "learning_rate": 5.377266791505476e-06, + "loss": 0.5752, + "step": 1614 + }, + { + "epoch": 1.99, + "learning_rate": 5.365466789213001e-06, + "loss": 0.5054, + "step": 1615 + }, + { + "epoch": 1.99, + "learning_rate": 5.3536749995394945e-06, + "loss": 0.4276, + "step": 1616 + }, + { + "epoch": 1.99, + "learning_rate": 5.341891443380585e-06, + "loss": 0.608, + "step": 1617 + }, + { + "epoch": 1.99, + "learning_rate": 5.330116141617308e-06, + "loss": 0.5137, + "step": 1618 + }, + { + "epoch": 2.0, + "learning_rate": 5.318349115116079e-06, + "loss": 0.4922, + "step": 1619 + }, + { + "epoch": 2.0, + "learning_rate": 5.306590384728638e-06, + "loss": 0.5918, + "step": 1620 + }, + { + "epoch": 2.0, + "learning_rate": 5.294839971292026e-06, + "loss": 0.6453, + "step": 1621 + }, + { + "epoch": 2.0, + "learning_rate": 5.283097895628552e-06, + "loss": 0.5998, + "step": 1622 + }, + { + "epoch": 2.0, + "learning_rate": 5.2713641785457504e-06, + "loss": 0.4878, + "step": 1623 + }, + { + "epoch": 2.0, + "learning_rate": 5.259638840836332e-06, + "loss": 0.2957, + "step": 1624 + }, + { + "epoch": 2.0, + "learning_rate": 5.247921903278177e-06, + "loss": 0.2726, + "step": 1625 + }, + { + "epoch": 2.0, + "learning_rate": 5.2362133866342625e-06, + "loss": 0.2722, + "step": 1626 + }, + { + "epoch": 2.01, + "learning_rate": 5.224513311652654e-06, + "loss": 0.2436, + "step": 1627 + }, + { + "epoch": 2.01, + "learning_rate": 5.212821699066459e-06, + "loss": 0.2151, + "step": 1628 + }, + { + "epoch": 2.01, + "learning_rate": 5.20113856959378e-06, + "loss": 0.3148, + "step": 1629 + }, + { + "epoch": 2.01, + "learning_rate": 5.189463943937697e-06, + "loss": 0.2805, + "step": 1630 + }, + { + "epoch": 2.01, + "learning_rate": 5.1777978427862094e-06, + "loss": 0.2377, + "step": 1631 + }, + { + "epoch": 2.01, + "learning_rate": 5.166140286812227e-06, + "loss": 0.1958, + "step": 1632 + }, + { + "epoch": 2.01, + "learning_rate": 5.1544912966735e-06, + "loss": 0.2196, + "step": 1633 + }, + { + "epoch": 2.01, + "learning_rate": 5.142850893012609e-06, + "loss": 0.2997, + "step": 1634 + }, + { + "epoch": 2.02, + "learning_rate": 5.131219096456921e-06, + "loss": 0.2391, + "step": 1635 + }, + { + "epoch": 2.02, + "learning_rate": 5.119595927618546e-06, + "loss": 0.261, + "step": 1636 + }, + { + "epoch": 2.02, + "learning_rate": 5.1079814070943e-06, + "loss": 0.2806, + "step": 1637 + }, + { + "epoch": 2.02, + "learning_rate": 5.0963755554656856e-06, + "loss": 0.2571, + "step": 1638 + }, + { + "epoch": 2.02, + "learning_rate": 5.084778393298837e-06, + "loss": 0.3336, + "step": 1639 + }, + { + "epoch": 2.02, + "learning_rate": 5.073189941144495e-06, + "loss": 0.2405, + "step": 1640 + }, + { + "epoch": 2.02, + "learning_rate": 5.06161021953796e-06, + "loss": 0.2939, + "step": 1641 + }, + { + "epoch": 2.02, + "learning_rate": 5.050039248999057e-06, + "loss": 0.2555, + "step": 1642 + }, + { + "epoch": 2.03, + "learning_rate": 5.0384770500321175e-06, + "loss": 0.2215, + "step": 1643 + }, + { + "epoch": 2.03, + "learning_rate": 5.026923643125924e-06, + "loss": 0.2832, + "step": 1644 + }, + { + "epoch": 2.03, + "learning_rate": 5.015379048753669e-06, + "loss": 0.2368, + "step": 1645 + }, + { + "epoch": 2.03, + "learning_rate": 5.003843287372947e-06, + "loss": 0.2394, + "step": 1646 + }, + { + "epoch": 2.03, + "learning_rate": 4.9923163794256805e-06, + "loss": 0.2788, + "step": 1647 + }, + { + "epoch": 2.03, + "learning_rate": 4.980798345338117e-06, + "loss": 0.2493, + "step": 1648 + }, + { + "epoch": 2.03, + "learning_rate": 4.9692892055207784e-06, + "loss": 0.2366, + "step": 1649 + }, + { + "epoch": 2.03, + "learning_rate": 4.957788980368416e-06, + "loss": 0.2715, + "step": 1650 + }, + { + "epoch": 2.04, + "learning_rate": 4.9462976902599945e-06, + "loss": 0.2447, + "step": 1651 + }, + { + "epoch": 2.04, + "learning_rate": 4.934815355558636e-06, + "loss": 0.255, + "step": 1652 + }, + { + "epoch": 2.04, + "learning_rate": 4.923341996611604e-06, + "loss": 0.2384, + "step": 1653 + }, + { + "epoch": 2.04, + "learning_rate": 4.9118776337502425e-06, + "loss": 0.2678, + "step": 1654 + }, + { + "epoch": 2.04, + "learning_rate": 4.900422287289966e-06, + "loss": 0.2786, + "step": 1655 + }, + { + "epoch": 2.04, + "learning_rate": 4.888975977530212e-06, + "loss": 0.2506, + "step": 1656 + }, + { + "epoch": 2.04, + "learning_rate": 4.877538724754392e-06, + "loss": 0.217, + "step": 1657 + }, + { + "epoch": 2.04, + "learning_rate": 4.866110549229881e-06, + "loss": 0.2486, + "step": 1658 + }, + { + "epoch": 2.05, + "learning_rate": 4.85469147120796e-06, + "loss": 0.2525, + "step": 1659 + }, + { + "epoch": 2.05, + "learning_rate": 4.843281510923793e-06, + "loss": 0.2954, + "step": 1660 + }, + { + "epoch": 2.05, + "learning_rate": 4.831880688596392e-06, + "loss": 0.1827, + "step": 1661 + }, + { + "epoch": 2.05, + "learning_rate": 4.820489024428566e-06, + "loss": 0.213, + "step": 1662 + }, + { + "epoch": 2.05, + "learning_rate": 4.809106538606896e-06, + "loss": 0.2314, + "step": 1663 + }, + { + "epoch": 2.05, + "learning_rate": 4.797733251301705e-06, + "loss": 0.2488, + "step": 1664 + }, + { + "epoch": 2.05, + "learning_rate": 4.7863691826670146e-06, + "loss": 0.2319, + "step": 1665 + }, + { + "epoch": 2.05, + "learning_rate": 4.775014352840512e-06, + "loss": 0.2566, + "step": 1666 + }, + { + "epoch": 2.06, + "learning_rate": 4.7636687819435066e-06, + "loss": 0.2962, + "step": 1667 + }, + { + "epoch": 2.06, + "learning_rate": 4.7523324900808986e-06, + "loss": 0.257, + "step": 1668 + }, + { + "epoch": 2.06, + "learning_rate": 4.741005497341154e-06, + "loss": 0.29, + "step": 1669 + }, + { + "epoch": 2.06, + "learning_rate": 4.729687823796262e-06, + "loss": 0.2551, + "step": 1670 + }, + { + "epoch": 2.06, + "learning_rate": 4.718379489501682e-06, + "loss": 0.256, + "step": 1671 + }, + { + "epoch": 2.06, + "learning_rate": 4.707080514496345e-06, + "loss": 0.2352, + "step": 1672 + }, + { + "epoch": 2.06, + "learning_rate": 4.695790918802577e-06, + "loss": 0.1649, + "step": 1673 + }, + { + "epoch": 2.06, + "learning_rate": 4.684510722426094e-06, + "loss": 0.1883, + "step": 1674 + }, + { + "epoch": 2.07, + "learning_rate": 4.673239945355962e-06, + "loss": 0.2846, + "step": 1675 + }, + { + "epoch": 2.07, + "learning_rate": 4.661978607564538e-06, + "loss": 0.2612, + "step": 1676 + }, + { + "epoch": 2.07, + "learning_rate": 4.650726729007465e-06, + "loss": 0.2801, + "step": 1677 + }, + { + "epoch": 2.07, + "learning_rate": 4.639484329623627e-06, + "loss": 0.2749, + "step": 1678 + }, + { + "epoch": 2.07, + "learning_rate": 4.628251429335099e-06, + "loss": 0.2424, + "step": 1679 + }, + { + "epoch": 2.07, + "learning_rate": 4.617028048047124e-06, + "loss": 0.2574, + "step": 1680 + }, + { + "epoch": 2.07, + "learning_rate": 4.605814205648087e-06, + "loss": 0.2772, + "step": 1681 + }, + { + "epoch": 2.07, + "learning_rate": 4.594609922009462e-06, + "loss": 0.2159, + "step": 1682 + }, + { + "epoch": 2.08, + "learning_rate": 4.583415216985791e-06, + "loss": 0.2786, + "step": 1683 + }, + { + "epoch": 2.08, + "learning_rate": 4.572230110414633e-06, + "loss": 0.255, + "step": 1684 + }, + { + "epoch": 2.08, + "learning_rate": 4.56105462211654e-06, + "loss": 0.211, + "step": 1685 + }, + { + "epoch": 2.08, + "learning_rate": 4.5498887718950244e-06, + "loss": 0.2462, + "step": 1686 + }, + { + "epoch": 2.08, + "learning_rate": 4.538732579536523e-06, + "loss": 0.286, + "step": 1687 + }, + { + "epoch": 2.08, + "learning_rate": 4.5275860648103496e-06, + "loss": 0.246, + "step": 1688 + }, + { + "epoch": 2.08, + "learning_rate": 4.516449247468666e-06, + "loss": 0.3446, + "step": 1689 + }, + { + "epoch": 2.08, + "learning_rate": 4.505322147246463e-06, + "loss": 0.3438, + "step": 1690 + }, + { + "epoch": 2.09, + "learning_rate": 4.494204783861502e-06, + "loss": 0.2451, + "step": 1691 + }, + { + "epoch": 2.09, + "learning_rate": 4.4830971770142985e-06, + "loss": 0.2593, + "step": 1692 + }, + { + "epoch": 2.09, + "learning_rate": 4.4719993463880695e-06, + "loss": 0.2224, + "step": 1693 + }, + { + "epoch": 2.09, + "learning_rate": 4.460911311648709e-06, + "loss": 0.2472, + "step": 1694 + }, + { + "epoch": 2.09, + "learning_rate": 4.4498330924447596e-06, + "loss": 0.2201, + "step": 1695 + }, + { + "epoch": 2.09, + "learning_rate": 4.4387647084073695e-06, + "loss": 0.2325, + "step": 1696 + }, + { + "epoch": 2.09, + "learning_rate": 4.427706179150247e-06, + "loss": 0.3582, + "step": 1697 + }, + { + "epoch": 2.09, + "learning_rate": 4.416657524269652e-06, + "loss": 0.2418, + "step": 1698 + }, + { + "epoch": 2.09, + "learning_rate": 4.40561876334434e-06, + "loss": 0.2938, + "step": 1699 + }, + { + "epoch": 2.1, + "learning_rate": 4.394589915935533e-06, + "loss": 0.2141, + "step": 1700 + }, + { + "epoch": 2.1, + "learning_rate": 4.383571001586883e-06, + "loss": 0.216, + "step": 1701 + }, + { + "epoch": 2.1, + "learning_rate": 4.3725620398244454e-06, + "loss": 0.2751, + "step": 1702 + }, + { + "epoch": 2.1, + "learning_rate": 4.361563050156639e-06, + "loss": 0.2232, + "step": 1703 + }, + { + "epoch": 2.1, + "learning_rate": 4.3505740520742134e-06, + "loss": 0.2805, + "step": 1704 + }, + { + "epoch": 2.1, + "learning_rate": 4.339595065050206e-06, + "loss": 0.2571, + "step": 1705 + }, + { + "epoch": 2.1, + "learning_rate": 4.328626108539914e-06, + "loss": 0.3001, + "step": 1706 + }, + { + "epoch": 2.1, + "learning_rate": 4.317667201980868e-06, + "loss": 0.1833, + "step": 1707 + }, + { + "epoch": 2.11, + "learning_rate": 4.3067183647927855e-06, + "loss": 0.2197, + "step": 1708 + }, + { + "epoch": 2.11, + "learning_rate": 4.29577961637754e-06, + "loss": 0.274, + "step": 1709 + }, + { + "epoch": 2.11, + "learning_rate": 4.284850976119121e-06, + "loss": 0.2418, + "step": 1710 + }, + { + "epoch": 2.11, + "learning_rate": 4.273932463383619e-06, + "loss": 0.2115, + "step": 1711 + }, + { + "epoch": 2.11, + "learning_rate": 4.2630240975191695e-06, + "loss": 0.2552, + "step": 1712 + }, + { + "epoch": 2.11, + "learning_rate": 4.2521258978559324e-06, + "loss": 0.2395, + "step": 1713 + }, + { + "epoch": 2.11, + "learning_rate": 4.2412378837060465e-06, + "loss": 0.2751, + "step": 1714 + }, + { + "epoch": 2.11, + "learning_rate": 4.2303600743636e-06, + "loss": 0.2147, + "step": 1715 + }, + { + "epoch": 2.12, + "learning_rate": 4.219492489104604e-06, + "loss": 0.2296, + "step": 1716 + }, + { + "epoch": 2.12, + "learning_rate": 4.208635147186956e-06, + "loss": 0.249, + "step": 1717 + }, + { + "epoch": 2.12, + "learning_rate": 4.197788067850388e-06, + "loss": 0.203, + "step": 1718 + }, + { + "epoch": 2.12, + "learning_rate": 4.186951270316455e-06, + "loss": 0.2284, + "step": 1719 + }, + { + "epoch": 2.12, + "learning_rate": 4.176124773788497e-06, + "loss": 0.2475, + "step": 1720 + }, + { + "epoch": 2.12, + "learning_rate": 4.165308597451586e-06, + "loss": 0.2101, + "step": 1721 + }, + { + "epoch": 2.12, + "learning_rate": 4.154502760472522e-06, + "loss": 0.2283, + "step": 1722 + }, + { + "epoch": 2.12, + "learning_rate": 4.143707281999767e-06, + "loss": 0.2718, + "step": 1723 + }, + { + "epoch": 2.13, + "learning_rate": 4.13292218116344e-06, + "loss": 0.3606, + "step": 1724 + }, + { + "epoch": 2.13, + "learning_rate": 4.12214747707527e-06, + "loss": 0.2531, + "step": 1725 + }, + { + "epoch": 2.13, + "learning_rate": 4.111383188828553e-06, + "loss": 0.2605, + "step": 1726 + }, + { + "epoch": 2.13, + "learning_rate": 4.100629335498131e-06, + "loss": 0.2688, + "step": 1727 + }, + { + "epoch": 2.13, + "learning_rate": 4.0898859361403595e-06, + "loss": 0.2588, + "step": 1728 + }, + { + "epoch": 2.13, + "learning_rate": 4.079153009793068e-06, + "loss": 0.2346, + "step": 1729 + }, + { + "epoch": 2.13, + "learning_rate": 4.068430575475526e-06, + "loss": 0.2185, + "step": 1730 + }, + { + "epoch": 2.13, + "learning_rate": 4.057718652188409e-06, + "loss": 0.21, + "step": 1731 + }, + { + "epoch": 2.14, + "learning_rate": 4.047017258913765e-06, + "loss": 0.1869, + "step": 1732 + }, + { + "epoch": 2.14, + "learning_rate": 4.036326414614985e-06, + "loss": 0.2041, + "step": 1733 + }, + { + "epoch": 2.14, + "learning_rate": 4.025646138236774e-06, + "loss": 0.2325, + "step": 1734 + }, + { + "epoch": 2.14, + "learning_rate": 4.014976448705095e-06, + "loss": 0.2509, + "step": 1735 + }, + { + "epoch": 2.14, + "learning_rate": 4.004317364927164e-06, + "loss": 0.2113, + "step": 1736 + }, + { + "epoch": 2.14, + "learning_rate": 3.99366890579139e-06, + "loss": 0.2511, + "step": 1737 + }, + { + "epoch": 2.14, + "learning_rate": 3.983031090167368e-06, + "loss": 0.2316, + "step": 1738 + }, + { + "epoch": 2.14, + "learning_rate": 3.97240393690583e-06, + "loss": 0.2598, + "step": 1739 + }, + { + "epoch": 2.15, + "learning_rate": 3.9617874648386e-06, + "loss": 0.242, + "step": 1740 + }, + { + "epoch": 2.15, + "learning_rate": 3.951181692778594e-06, + "loss": 0.2533, + "step": 1741 + }, + { + "epoch": 2.15, + "learning_rate": 3.94058663951975e-06, + "loss": 0.2956, + "step": 1742 + }, + { + "epoch": 2.15, + "learning_rate": 3.930002323837026e-06, + "loss": 0.1934, + "step": 1743 + }, + { + "epoch": 2.15, + "learning_rate": 3.919428764486338e-06, + "loss": 0.2516, + "step": 1744 + }, + { + "epoch": 2.15, + "learning_rate": 3.908865980204555e-06, + "loss": 0.2403, + "step": 1745 + }, + { + "epoch": 2.15, + "learning_rate": 3.898313989709447e-06, + "loss": 0.2412, + "step": 1746 + }, + { + "epoch": 2.15, + "learning_rate": 3.88777281169965e-06, + "loss": 0.2444, + "step": 1747 + }, + { + "epoch": 2.16, + "learning_rate": 3.877242464854654e-06, + "loss": 0.2781, + "step": 1748 + }, + { + "epoch": 2.16, + "learning_rate": 3.86672296783474e-06, + "loss": 0.2056, + "step": 1749 + }, + { + "epoch": 2.16, + "learning_rate": 3.85621433928097e-06, + "loss": 0.2467, + "step": 1750 + }, + { + "epoch": 2.16, + "learning_rate": 3.845716597815154e-06, + "loss": 0.2356, + "step": 1751 + }, + { + "epoch": 2.16, + "learning_rate": 3.835229762039798e-06, + "loss": 0.2284, + "step": 1752 + }, + { + "epoch": 2.16, + "learning_rate": 3.824753850538082e-06, + "loss": 0.2827, + "step": 1753 + }, + { + "epoch": 2.16, + "learning_rate": 3.8142888818738367e-06, + "loss": 0.2489, + "step": 1754 + }, + { + "epoch": 2.16, + "learning_rate": 3.8038348745914966e-06, + "loss": 0.2418, + "step": 1755 + }, + { + "epoch": 2.17, + "learning_rate": 3.7933918472160757e-06, + "loss": 0.2556, + "step": 1756 + }, + { + "epoch": 2.17, + "learning_rate": 3.782959818253126e-06, + "loss": 0.2066, + "step": 1757 + }, + { + "epoch": 2.17, + "learning_rate": 3.7725388061887056e-06, + "loss": 0.2399, + "step": 1758 + }, + { + "epoch": 2.17, + "learning_rate": 3.7621288294893634e-06, + "loss": 0.2123, + "step": 1759 + }, + { + "epoch": 2.17, + "learning_rate": 3.7517299066020874e-06, + "loss": 0.2127, + "step": 1760 + }, + { + "epoch": 2.17, + "learning_rate": 3.741342055954269e-06, + "loss": 0.2767, + "step": 1761 + }, + { + "epoch": 2.17, + "learning_rate": 3.7309652959536947e-06, + "loss": 0.1938, + "step": 1762 + }, + { + "epoch": 2.17, + "learning_rate": 3.720599644988482e-06, + "loss": 0.3144, + "step": 1763 + }, + { + "epoch": 2.17, + "learning_rate": 3.7102451214270776e-06, + "loss": 0.1957, + "step": 1764 + }, + { + "epoch": 2.18, + "learning_rate": 3.699901743618194e-06, + "loss": 0.2044, + "step": 1765 + }, + { + "epoch": 2.18, + "learning_rate": 3.689569529890805e-06, + "loss": 0.214, + "step": 1766 + }, + { + "epoch": 2.18, + "learning_rate": 3.6792484985541034e-06, + "loss": 0.2696, + "step": 1767 + }, + { + "epoch": 2.18, + "learning_rate": 3.6689386678974504e-06, + "loss": 0.25, + "step": 1768 + }, + { + "epoch": 2.18, + "learning_rate": 3.658640056190378e-06, + "loss": 0.255, + "step": 1769 + }, + { + "epoch": 2.18, + "learning_rate": 3.64835268168252e-06, + "loss": 0.198, + "step": 1770 + }, + { + "epoch": 2.18, + "learning_rate": 3.6380765626036095e-06, + "loss": 0.2871, + "step": 1771 + }, + { + "epoch": 2.18, + "learning_rate": 3.6278117171634366e-06, + "loss": 0.2597, + "step": 1772 + }, + { + "epoch": 2.19, + "learning_rate": 3.617558163551802e-06, + "loss": 0.2177, + "step": 1773 + }, + { + "epoch": 2.19, + "learning_rate": 3.607315919938501e-06, + "loss": 0.2662, + "step": 1774 + }, + { + "epoch": 2.19, + "learning_rate": 3.597085004473293e-06, + "loss": 0.2458, + "step": 1775 + }, + { + "epoch": 2.19, + "learning_rate": 3.586865435285858e-06, + "loss": 0.296, + "step": 1776 + }, + { + "epoch": 2.19, + "learning_rate": 3.576657230485775e-06, + "loss": 0.25, + "step": 1777 + }, + { + "epoch": 2.19, + "learning_rate": 3.5664604081624787e-06, + "loss": 0.2613, + "step": 1778 + }, + { + "epoch": 2.19, + "learning_rate": 3.556274986385231e-06, + "loss": 0.2094, + "step": 1779 + }, + { + "epoch": 2.19, + "learning_rate": 3.546100983203099e-06, + "loss": 0.1816, + "step": 1780 + }, + { + "epoch": 2.2, + "learning_rate": 3.5359384166449185e-06, + "loss": 0.2269, + "step": 1781 + }, + { + "epoch": 2.2, + "learning_rate": 3.5257873047192448e-06, + "loss": 0.2249, + "step": 1782 + }, + { + "epoch": 2.2, + "learning_rate": 3.51564766541435e-06, + "loss": 0.237, + "step": 1783 + }, + { + "epoch": 2.2, + "learning_rate": 3.505519516698165e-06, + "loss": 0.3057, + "step": 1784 + }, + { + "epoch": 2.2, + "learning_rate": 3.4954028765182633e-06, + "loss": 0.3133, + "step": 1785 + }, + { + "epoch": 2.2, + "learning_rate": 3.4852977628018323e-06, + "loss": 0.3499, + "step": 1786 + }, + { + "epoch": 2.2, + "learning_rate": 3.475204193455618e-06, + "loss": 0.2414, + "step": 1787 + }, + { + "epoch": 2.2, + "learning_rate": 3.4651221863659236e-06, + "loss": 0.302, + "step": 1788 + }, + { + "epoch": 2.21, + "learning_rate": 3.4550517593985512e-06, + "loss": 0.2449, + "step": 1789 + }, + { + "epoch": 2.21, + "learning_rate": 3.4449929303987963e-06, + "loss": 0.1947, + "step": 1790 + }, + { + "epoch": 2.21, + "learning_rate": 3.434945717191388e-06, + "loss": 0.2294, + "step": 1791 + }, + { + "epoch": 2.21, + "learning_rate": 3.4249101375804804e-06, + "loss": 0.2958, + "step": 1792 + }, + { + "epoch": 2.21, + "learning_rate": 3.414886209349615e-06, + "loss": 0.2597, + "step": 1793 + }, + { + "epoch": 2.21, + "learning_rate": 3.4048739502616747e-06, + "loss": 0.2176, + "step": 1794 + }, + { + "epoch": 2.21, + "learning_rate": 3.394873378058876e-06, + "loss": 0.2219, + "step": 1795 + }, + { + "epoch": 2.21, + "learning_rate": 3.384884510462717e-06, + "loss": 0.2022, + "step": 1796 + }, + { + "epoch": 2.22, + "learning_rate": 3.3749073651739594e-06, + "loss": 0.2298, + "step": 1797 + }, + { + "epoch": 2.22, + "learning_rate": 3.3649419598725964e-06, + "loss": 0.2568, + "step": 1798 + }, + { + "epoch": 2.22, + "learning_rate": 3.3549883122178086e-06, + "loss": 0.3431, + "step": 1799 + }, + { + "epoch": 2.22, + "learning_rate": 3.345046439847941e-06, + "loss": 0.2336, + "step": 1800 + }, + { + "epoch": 2.22, + "learning_rate": 3.3351163603804805e-06, + "loss": 0.2458, + "step": 1801 + }, + { + "epoch": 2.22, + "learning_rate": 3.325198091412013e-06, + "loss": 0.2538, + "step": 1802 + }, + { + "epoch": 2.22, + "learning_rate": 3.3152916505181976e-06, + "loss": 0.2347, + "step": 1803 + }, + { + "epoch": 2.22, + "learning_rate": 3.3053970552537285e-06, + "loss": 0.2661, + "step": 1804 + }, + { + "epoch": 2.23, + "learning_rate": 3.2955143231523067e-06, + "loss": 0.236, + "step": 1805 + }, + { + "epoch": 2.23, + "learning_rate": 3.2856434717266193e-06, + "loss": 0.2925, + "step": 1806 + }, + { + "epoch": 2.23, + "learning_rate": 3.2757845184683e-06, + "loss": 0.3279, + "step": 1807 + }, + { + "epoch": 2.23, + "learning_rate": 3.2659374808478892e-06, + "loss": 0.1701, + "step": 1808 + }, + { + "epoch": 2.23, + "learning_rate": 3.2561023763148237e-06, + "loss": 0.2239, + "step": 1809 + }, + { + "epoch": 2.23, + "learning_rate": 3.2462792222973826e-06, + "loss": 0.2376, + "step": 1810 + }, + { + "epoch": 2.23, + "learning_rate": 3.2364680362026767e-06, + "loss": 0.2611, + "step": 1811 + }, + { + "epoch": 2.23, + "learning_rate": 3.2266688354166107e-06, + "loss": 0.2332, + "step": 1812 + }, + { + "epoch": 2.24, + "learning_rate": 3.216881637303839e-06, + "loss": 0.2581, + "step": 1813 + }, + { + "epoch": 2.24, + "learning_rate": 3.207106459207758e-06, + "loss": 0.3197, + "step": 1814 + }, + { + "epoch": 2.24, + "learning_rate": 3.1973433184504632e-06, + "loss": 0.204, + "step": 1815 + }, + { + "epoch": 2.24, + "learning_rate": 3.1875922323327137e-06, + "loss": 0.2729, + "step": 1816 + }, + { + "epoch": 2.24, + "learning_rate": 3.177853218133905e-06, + "loss": 0.2573, + "step": 1817 + }, + { + "epoch": 2.24, + "learning_rate": 3.1681262931120504e-06, + "loss": 0.2006, + "step": 1818 + }, + { + "epoch": 2.24, + "learning_rate": 3.158411474503735e-06, + "loss": 0.2914, + "step": 1819 + }, + { + "epoch": 2.24, + "learning_rate": 3.1487087795240976e-06, + "loss": 0.222, + "step": 1820 + }, + { + "epoch": 2.25, + "learning_rate": 3.1390182253667745e-06, + "loss": 0.2342, + "step": 1821 + }, + { + "epoch": 2.25, + "learning_rate": 3.1293398292039077e-06, + "loss": 0.3162, + "step": 1822 + }, + { + "epoch": 2.25, + "learning_rate": 3.1196736081860855e-06, + "loss": 0.2711, + "step": 1823 + }, + { + "epoch": 2.25, + "learning_rate": 3.110019579442328e-06, + "loss": 0.2345, + "step": 1824 + }, + { + "epoch": 2.25, + "learning_rate": 3.100377760080041e-06, + "loss": 0.2454, + "step": 1825 + }, + { + "epoch": 2.25, + "learning_rate": 3.090748167184997e-06, + "loss": 0.2103, + "step": 1826 + }, + { + "epoch": 2.25, + "learning_rate": 3.0811308178213063e-06, + "loss": 0.1984, + "step": 1827 + }, + { + "epoch": 2.25, + "learning_rate": 3.0715257290313836e-06, + "loss": 0.2253, + "step": 1828 + }, + { + "epoch": 2.26, + "learning_rate": 3.0619329178359103e-06, + "loss": 0.2281, + "step": 1829 + }, + { + "epoch": 2.26, + "learning_rate": 3.0523524012338224e-06, + "loss": 0.2742, + "step": 1830 + }, + { + "epoch": 2.26, + "learning_rate": 3.042784196202255e-06, + "loss": 0.2047, + "step": 1831 + }, + { + "epoch": 2.26, + "learning_rate": 3.0332283196965384e-06, + "loss": 0.2053, + "step": 1832 + }, + { + "epoch": 2.26, + "learning_rate": 3.023684788650154e-06, + "loss": 0.2267, + "step": 1833 + }, + { + "epoch": 2.26, + "learning_rate": 3.0141536199747e-06, + "loss": 0.2476, + "step": 1834 + }, + { + "epoch": 2.26, + "learning_rate": 3.004634830559874e-06, + "loss": 0.2302, + "step": 1835 + }, + { + "epoch": 2.26, + "learning_rate": 2.9951284372734392e-06, + "loss": 0.2569, + "step": 1836 + }, + { + "epoch": 2.26, + "learning_rate": 2.985634456961184e-06, + "loss": 0.2088, + "step": 1837 + }, + { + "epoch": 2.27, + "learning_rate": 2.976152906446903e-06, + "loss": 0.1999, + "step": 1838 + }, + { + "epoch": 2.27, + "learning_rate": 2.9666838025323685e-06, + "loss": 0.1999, + "step": 1839 + }, + { + "epoch": 2.27, + "learning_rate": 2.9572271619972957e-06, + "loss": 0.2858, + "step": 1840 + }, + { + "epoch": 2.27, + "learning_rate": 2.947783001599315e-06, + "loss": 0.2001, + "step": 1841 + }, + { + "epoch": 2.27, + "learning_rate": 2.938351338073937e-06, + "loss": 0.2247, + "step": 1842 + }, + { + "epoch": 2.27, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.2162, + "step": 1843 + }, + { + "epoch": 2.27, + "learning_rate": 2.9195255684722778e-06, + "loss": 0.2433, + "step": 1844 + }, + { + "epoch": 2.27, + "learning_rate": 2.9101314957561864e-06, + "loss": 0.2246, + "step": 1845 + }, + { + "epoch": 2.28, + "learning_rate": 2.9007499866330037e-06, + "loss": 0.3401, + "step": 1846 + }, + { + "epoch": 2.28, + "learning_rate": 2.891381057727216e-06, + "loss": 0.2007, + "step": 1847 + }, + { + "epoch": 2.28, + "learning_rate": 2.8820247256410272e-06, + "loss": 0.2161, + "step": 1848 + }, + { + "epoch": 2.28, + "learning_rate": 2.8726810069543156e-06, + "loss": 0.3225, + "step": 1849 + }, + { + "epoch": 2.28, + "learning_rate": 2.863349918224607e-06, + "loss": 0.1747, + "step": 1850 + }, + { + "epoch": 2.28, + "learning_rate": 2.8540314759870446e-06, + "loss": 0.2043, + "step": 1851 + }, + { + "epoch": 2.28, + "learning_rate": 2.844725696754359e-06, + "loss": 0.2779, + "step": 1852 + }, + { + "epoch": 2.28, + "learning_rate": 2.8354325970168483e-06, + "loss": 0.2291, + "step": 1853 + }, + { + "epoch": 2.29, + "learning_rate": 2.826152193242342e-06, + "loss": 0.2322, + "step": 1854 + }, + { + "epoch": 2.29, + "learning_rate": 2.8168845018761616e-06, + "loss": 0.2788, + "step": 1855 + }, + { + "epoch": 2.29, + "learning_rate": 2.8076295393411126e-06, + "loss": 0.2279, + "step": 1856 + }, + { + "epoch": 2.29, + "learning_rate": 2.7983873220374415e-06, + "loss": 0.2413, + "step": 1857 + }, + { + "epoch": 2.29, + "learning_rate": 2.7891578663428033e-06, + "loss": 0.2602, + "step": 1858 + }, + { + "epoch": 2.29, + "learning_rate": 2.7799411886122496e-06, + "loss": 0.238, + "step": 1859 + }, + { + "epoch": 2.29, + "learning_rate": 2.770737305178176e-06, + "loss": 0.2843, + "step": 1860 + }, + { + "epoch": 2.29, + "learning_rate": 2.7615462323503186e-06, + "loss": 0.2834, + "step": 1861 + }, + { + "epoch": 2.3, + "learning_rate": 2.7523679864157083e-06, + "loss": 0.2222, + "step": 1862 + }, + { + "epoch": 2.3, + "learning_rate": 2.7432025836386412e-06, + "loss": 0.2896, + "step": 1863 + }, + { + "epoch": 2.3, + "learning_rate": 2.734050040260655e-06, + "loss": 0.2222, + "step": 1864 + }, + { + "epoch": 2.3, + "learning_rate": 2.724910372500508e-06, + "loss": 0.3001, + "step": 1865 + }, + { + "epoch": 2.3, + "learning_rate": 2.715783596554136e-06, + "loss": 0.1785, + "step": 1866 + }, + { + "epoch": 2.3, + "learning_rate": 2.7066697285946376e-06, + "loss": 0.2327, + "step": 1867 + }, + { + "epoch": 2.3, + "learning_rate": 2.6975687847722197e-06, + "loss": 0.2744, + "step": 1868 + }, + { + "epoch": 2.3, + "learning_rate": 2.6884807812142043e-06, + "loss": 0.2375, + "step": 1869 + }, + { + "epoch": 2.31, + "learning_rate": 2.679405734024977e-06, + "loss": 0.2049, + "step": 1870 + }, + { + "epoch": 2.31, + "learning_rate": 2.670343659285968e-06, + "loss": 0.243, + "step": 1871 + }, + { + "epoch": 2.31, + "learning_rate": 2.6612945730556115e-06, + "loss": 0.2648, + "step": 1872 + }, + { + "epoch": 2.31, + "learning_rate": 2.6522584913693295e-06, + "loss": 0.2579, + "step": 1873 + }, + { + "epoch": 2.31, + "learning_rate": 2.643235430239499e-06, + "loss": 0.2406, + "step": 1874 + }, + { + "epoch": 2.31, + "learning_rate": 2.6342254056554306e-06, + "loss": 0.2848, + "step": 1875 + }, + { + "epoch": 2.31, + "learning_rate": 2.62522843358332e-06, + "loss": 0.2269, + "step": 1876 + }, + { + "epoch": 2.31, + "learning_rate": 2.616244529966244e-06, + "loss": 0.2311, + "step": 1877 + }, + { + "epoch": 2.32, + "learning_rate": 2.607273710724121e-06, + "loss": 0.1959, + "step": 1878 + }, + { + "epoch": 2.32, + "learning_rate": 2.598315991753675e-06, + "loss": 0.2513, + "step": 1879 + }, + { + "epoch": 2.32, + "learning_rate": 2.5893713889284257e-06, + "loss": 0.2659, + "step": 1880 + }, + { + "epoch": 2.32, + "learning_rate": 2.5804399180986417e-06, + "loss": 0.2592, + "step": 1881 + }, + { + "epoch": 2.32, + "learning_rate": 2.5715215950913253e-06, + "loss": 0.1922, + "step": 1882 + }, + { + "epoch": 2.32, + "learning_rate": 2.5626164357101857e-06, + "loss": 0.2214, + "step": 1883 + }, + { + "epoch": 2.32, + "learning_rate": 2.5537244557355965e-06, + "loss": 0.2318, + "step": 1884 + }, + { + "epoch": 2.32, + "learning_rate": 2.544845670924575e-06, + "loss": 0.2836, + "step": 1885 + }, + { + "epoch": 2.33, + "learning_rate": 2.5359800970107663e-06, + "loss": 0.2222, + "step": 1886 + }, + { + "epoch": 2.33, + "learning_rate": 2.527127749704399e-06, + "loss": 0.2877, + "step": 1887 + }, + { + "epoch": 2.33, + "learning_rate": 2.5182886446922673e-06, + "loss": 0.215, + "step": 1888 + }, + { + "epoch": 2.33, + "learning_rate": 2.509462797637693e-06, + "loss": 0.1795, + "step": 1889 + }, + { + "epoch": 2.33, + "learning_rate": 2.5006502241805064e-06, + "loss": 0.3077, + "step": 1890 + }, + { + "epoch": 2.33, + "learning_rate": 2.4918509399370194e-06, + "loss": 0.3146, + "step": 1891 + }, + { + "epoch": 2.33, + "learning_rate": 2.4830649604999967e-06, + "loss": 0.2538, + "step": 1892 + }, + { + "epoch": 2.33, + "learning_rate": 2.4742923014386154e-06, + "loss": 0.3021, + "step": 1893 + }, + { + "epoch": 2.34, + "learning_rate": 2.4655329782984617e-06, + "loss": 0.2217, + "step": 1894 + }, + { + "epoch": 2.34, + "learning_rate": 2.4567870066014785e-06, + "loss": 0.2357, + "step": 1895 + }, + { + "epoch": 2.34, + "learning_rate": 2.448054401845957e-06, + "loss": 0.2525, + "step": 1896 + }, + { + "epoch": 2.34, + "learning_rate": 2.4393351795065023e-06, + "loss": 0.1937, + "step": 1897 + }, + { + "epoch": 2.34, + "learning_rate": 2.4306293550339943e-06, + "loss": 0.3099, + "step": 1898 + }, + { + "epoch": 2.34, + "learning_rate": 2.421936943855586e-06, + "loss": 0.2315, + "step": 1899 + }, + { + "epoch": 2.34, + "learning_rate": 2.4132579613746475e-06, + "loss": 0.2648, + "step": 1900 + }, + { + "epoch": 2.34, + "learning_rate": 2.4045924229707663e-06, + "loss": 0.2625, + "step": 1901 + }, + { + "epoch": 2.34, + "learning_rate": 2.395940343999691e-06, + "loss": 0.2323, + "step": 1902 + }, + { + "epoch": 2.35, + "learning_rate": 2.387301739793333e-06, + "loss": 0.256, + "step": 1903 + }, + { + "epoch": 2.35, + "learning_rate": 2.3786766256597226e-06, + "loss": 0.2832, + "step": 1904 + }, + { + "epoch": 2.35, + "learning_rate": 2.3700650168829765e-06, + "loss": 0.2305, + "step": 1905 + }, + { + "epoch": 2.35, + "learning_rate": 2.361466928723293e-06, + "loss": 0.2059, + "step": 1906 + }, + { + "epoch": 2.35, + "learning_rate": 2.352882376416895e-06, + "loss": 0.2269, + "step": 1907 + }, + { + "epoch": 2.35, + "learning_rate": 2.344311375176034e-06, + "loss": 0.1862, + "step": 1908 + }, + { + "epoch": 2.35, + "learning_rate": 2.3357539401889438e-06, + "loss": 0.1913, + "step": 1909 + }, + { + "epoch": 2.35, + "learning_rate": 2.3272100866198133e-06, + "loss": 0.3016, + "step": 1910 + }, + { + "epoch": 2.36, + "learning_rate": 2.3186798296087663e-06, + "loss": 0.2021, + "step": 1911 + }, + { + "epoch": 2.36, + "learning_rate": 2.3101631842718376e-06, + "loss": 0.2962, + "step": 1912 + }, + { + "epoch": 2.36, + "learning_rate": 2.3016601657009364e-06, + "loss": 0.2406, + "step": 1913 + }, + { + "epoch": 2.36, + "learning_rate": 2.293170788963831e-06, + "loss": 0.2052, + "step": 1914 + }, + { + "epoch": 2.36, + "learning_rate": 2.284695069104107e-06, + "loss": 0.2693, + "step": 1915 + }, + { + "epoch": 2.36, + "learning_rate": 2.2762330211411523e-06, + "loss": 0.2062, + "step": 1916 + }, + { + "epoch": 2.36, + "learning_rate": 2.2677846600701305e-06, + "loss": 0.2078, + "step": 1917 + }, + { + "epoch": 2.36, + "learning_rate": 2.259350000861952e-06, + "loss": 0.2344, + "step": 1918 + }, + { + "epoch": 2.37, + "learning_rate": 2.2509290584632394e-06, + "loss": 0.2208, + "step": 1919 + }, + { + "epoch": 2.37, + "learning_rate": 2.2425218477963197e-06, + "loss": 0.2057, + "step": 1920 + }, + { + "epoch": 2.37, + "learning_rate": 2.234128383759174e-06, + "loss": 0.1667, + "step": 1921 + }, + { + "epoch": 2.37, + "learning_rate": 2.2257486812254336e-06, + "loss": 0.2648, + "step": 1922 + }, + { + "epoch": 2.37, + "learning_rate": 2.2173827550443417e-06, + "loss": 0.3612, + "step": 1923 + }, + { + "epoch": 2.37, + "learning_rate": 2.209030620040723e-06, + "loss": 0.2257, + "step": 1924 + }, + { + "epoch": 2.37, + "learning_rate": 2.2006922910149743e-06, + "loss": 0.1899, + "step": 1925 + }, + { + "epoch": 2.37, + "learning_rate": 2.192367782743016e-06, + "loss": 0.2461, + "step": 1926 + }, + { + "epoch": 2.38, + "learning_rate": 2.1840571099762865e-06, + "loss": 0.1491, + "step": 1927 + }, + { + "epoch": 2.38, + "learning_rate": 2.1757602874416993e-06, + "loss": 0.2768, + "step": 1928 + }, + { + "epoch": 2.38, + "learning_rate": 2.167477329841633e-06, + "loss": 0.2358, + "step": 1929 + }, + { + "epoch": 2.38, + "learning_rate": 2.1592082518538926e-06, + "loss": 0.252, + "step": 1930 + }, + { + "epoch": 2.38, + "learning_rate": 2.150953068131686e-06, + "loss": 0.229, + "step": 1931 + }, + { + "epoch": 2.38, + "learning_rate": 2.142711793303599e-06, + "loss": 0.2405, + "step": 1932 + }, + { + "epoch": 2.38, + "learning_rate": 2.1344844419735757e-06, + "loss": 0.2616, + "step": 1933 + }, + { + "epoch": 2.38, + "learning_rate": 2.1262710287208833e-06, + "loss": 0.2776, + "step": 1934 + }, + { + "epoch": 2.39, + "learning_rate": 2.118071568100094e-06, + "loss": 0.2241, + "step": 1935 + }, + { + "epoch": 2.39, + "learning_rate": 2.1098860746410498e-06, + "loss": 0.1682, + "step": 1936 + }, + { + "epoch": 2.39, + "learning_rate": 2.101714562848841e-06, + "loss": 0.254, + "step": 1937 + }, + { + "epoch": 2.39, + "learning_rate": 2.0935570472037892e-06, + "loss": 0.2709, + "step": 1938 + }, + { + "epoch": 2.39, + "learning_rate": 2.0854135421614108e-06, + "loss": 0.2177, + "step": 1939 + }, + { + "epoch": 2.39, + "learning_rate": 2.0772840621523905e-06, + "loss": 0.2648, + "step": 1940 + }, + { + "epoch": 2.39, + "learning_rate": 2.069168621582567e-06, + "loss": 0.2566, + "step": 1941 + }, + { + "epoch": 2.39, + "learning_rate": 2.061067234832893e-06, + "loss": 0.2936, + "step": 1942 + }, + { + "epoch": 2.4, + "learning_rate": 2.0529799162594242e-06, + "loss": 0.2977, + "step": 1943 + }, + { + "epoch": 2.4, + "learning_rate": 2.044906680193285e-06, + "loss": 0.2156, + "step": 1944 + }, + { + "epoch": 2.4, + "learning_rate": 2.0368475409406396e-06, + "loss": 0.2732, + "step": 1945 + }, + { + "epoch": 2.4, + "learning_rate": 2.0288025127826806e-06, + "loss": 0.2869, + "step": 1946 + }, + { + "epoch": 2.4, + "learning_rate": 2.0207716099755838e-06, + "loss": 0.2369, + "step": 1947 + }, + { + "epoch": 2.4, + "learning_rate": 2.012754846750509e-06, + "loss": 0.217, + "step": 1948 + }, + { + "epoch": 2.4, + "learning_rate": 2.004752237313544e-06, + "loss": 0.216, + "step": 1949 + }, + { + "epoch": 2.4, + "learning_rate": 1.9967637958457066e-06, + "loss": 0.2754, + "step": 1950 + }, + { + "epoch": 2.41, + "learning_rate": 1.9887895365029077e-06, + "loss": 0.2766, + "step": 1951 + }, + { + "epoch": 2.41, + "learning_rate": 1.9808294734159197e-06, + "loss": 0.24, + "step": 1952 + }, + { + "epoch": 2.41, + "learning_rate": 1.972883620690366e-06, + "loss": 0.3273, + "step": 1953 + }, + { + "epoch": 2.41, + "learning_rate": 1.9649519924066797e-06, + "loss": 0.2091, + "step": 1954 + }, + { + "epoch": 2.41, + "learning_rate": 1.957034602620098e-06, + "loss": 0.1846, + "step": 1955 + }, + { + "epoch": 2.41, + "learning_rate": 1.949131465360624e-06, + "loss": 0.2212, + "step": 1956 + }, + { + "epoch": 2.41, + "learning_rate": 1.9412425946329994e-06, + "loss": 0.2188, + "step": 1957 + }, + { + "epoch": 2.41, + "learning_rate": 1.9333680044166847e-06, + "loss": 0.2731, + "step": 1958 + }, + { + "epoch": 2.42, + "learning_rate": 1.925507708665841e-06, + "loss": 0.2514, + "step": 1959 + }, + { + "epoch": 2.42, + "learning_rate": 1.9176617213092973e-06, + "loss": 0.2029, + "step": 1960 + }, + { + "epoch": 2.42, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.25, + "step": 1961 + }, + { + "epoch": 2.42, + "learning_rate": 1.9020127273676204e-06, + "loss": 0.2291, + "step": 1962 + }, + { + "epoch": 2.42, + "learning_rate": 1.8942097485132626e-06, + "loss": 0.2498, + "step": 1963 + }, + { + "epoch": 2.42, + "learning_rate": 1.8864211335147165e-06, + "loss": 0.2556, + "step": 1964 + }, + { + "epoch": 2.42, + "learning_rate": 1.8786468961737902e-06, + "loss": 0.2859, + "step": 1965 + }, + { + "epoch": 2.42, + "learning_rate": 1.8708870502668075e-06, + "loss": 0.2185, + "step": 1966 + }, + { + "epoch": 2.43, + "learning_rate": 1.8631416095445965e-06, + "loss": 0.222, + "step": 1967 + }, + { + "epoch": 2.43, + "learning_rate": 1.8554105877324525e-06, + "loss": 0.252, + "step": 1968 + }, + { + "epoch": 2.43, + "learning_rate": 1.8476939985301257e-06, + "loss": 0.24, + "step": 1969 + }, + { + "epoch": 2.43, + "learning_rate": 1.8399918556117913e-06, + "loss": 0.2471, + "step": 1970 + }, + { + "epoch": 2.43, + "learning_rate": 1.8323041726260172e-06, + "loss": 0.2188, + "step": 1971 + }, + { + "epoch": 2.43, + "learning_rate": 1.824630963195756e-06, + "loss": 0.2426, + "step": 1972 + }, + { + "epoch": 2.43, + "learning_rate": 1.81697224091831e-06, + "loss": 0.1975, + "step": 1973 + }, + { + "epoch": 2.43, + "learning_rate": 1.8093280193653074e-06, + "loss": 0.2941, + "step": 1974 + }, + { + "epoch": 2.43, + "learning_rate": 1.8016983120826792e-06, + "loss": 0.2624, + "step": 1975 + }, + { + "epoch": 2.44, + "learning_rate": 1.7940831325906417e-06, + "loss": 0.2283, + "step": 1976 + }, + { + "epoch": 2.44, + "learning_rate": 1.7864824943836633e-06, + "loss": 0.2163, + "step": 1977 + }, + { + "epoch": 2.44, + "learning_rate": 1.7788964109304495e-06, + "loss": 0.2199, + "step": 1978 + }, + { + "epoch": 2.44, + "learning_rate": 1.7713248956739082e-06, + "loss": 0.2581, + "step": 1979 + }, + { + "epoch": 2.44, + "learning_rate": 1.7637679620311287e-06, + "loss": 0.2087, + "step": 1980 + }, + { + "epoch": 2.44, + "learning_rate": 1.7562256233933717e-06, + "loss": 0.3089, + "step": 1981 + }, + { + "epoch": 2.44, + "learning_rate": 1.7486978931260313e-06, + "loss": 0.2645, + "step": 1982 + }, + { + "epoch": 2.44, + "learning_rate": 1.7411847845686082e-06, + "loss": 0.2824, + "step": 1983 + }, + { + "epoch": 2.45, + "learning_rate": 1.7336863110346968e-06, + "loss": 0.2296, + "step": 1984 + }, + { + "epoch": 2.45, + "learning_rate": 1.7262024858119597e-06, + "loss": 0.2342, + "step": 1985 + }, + { + "epoch": 2.45, + "learning_rate": 1.7187333221621006e-06, + "loss": 0.2437, + "step": 1986 + }, + { + "epoch": 2.45, + "learning_rate": 1.711278833320844e-06, + "loss": 0.3142, + "step": 1987 + }, + { + "epoch": 2.45, + "learning_rate": 1.703839032497906e-06, + "loss": 0.2982, + "step": 1988 + }, + { + "epoch": 2.45, + "learning_rate": 1.6964139328769736e-06, + "loss": 0.1708, + "step": 1989 + }, + { + "epoch": 2.45, + "learning_rate": 1.6890035476156884e-06, + "loss": 0.2036, + "step": 1990 + }, + { + "epoch": 2.45, + "learning_rate": 1.6816078898456178e-06, + "loss": 0.2379, + "step": 1991 + }, + { + "epoch": 2.46, + "learning_rate": 1.6742269726722217e-06, + "loss": 0.1803, + "step": 1992 + }, + { + "epoch": 2.46, + "learning_rate": 1.6668608091748495e-06, + "loss": 0.2623, + "step": 1993 + }, + { + "epoch": 2.46, + "learning_rate": 1.6595094124067035e-06, + "loss": 0.1986, + "step": 1994 + }, + { + "epoch": 2.46, + "learning_rate": 1.6521727953948164e-06, + "loss": 0.2596, + "step": 1995 + }, + { + "epoch": 2.46, + "learning_rate": 1.6448509711400273e-06, + "loss": 0.3034, + "step": 1996 + }, + { + "epoch": 2.46, + "learning_rate": 1.637543952616969e-06, + "loss": 0.2521, + "step": 1997 + }, + { + "epoch": 2.46, + "learning_rate": 1.6302517527740358e-06, + "loss": 0.2619, + "step": 1998 + }, + { + "epoch": 2.46, + "learning_rate": 1.6229743845333635e-06, + "loss": 0.1621, + "step": 1999 + }, + { + "epoch": 2.47, + "learning_rate": 1.6157118607908006e-06, + "loss": 0.2861, + "step": 2000 + }, + { + "epoch": 2.47, + "learning_rate": 1.6084641944158918e-06, + "loss": 0.2541, + "step": 2001 + }, + { + "epoch": 2.47, + "learning_rate": 1.601231398251859e-06, + "loss": 0.226, + "step": 2002 + }, + { + "epoch": 2.47, + "learning_rate": 1.5940134851155698e-06, + "loss": 0.2685, + "step": 2003 + }, + { + "epoch": 2.47, + "learning_rate": 1.5868104677975183e-06, + "loss": 0.2974, + "step": 2004 + }, + { + "epoch": 2.47, + "learning_rate": 1.5796223590617987e-06, + "loss": 0.2989, + "step": 2005 + }, + { + "epoch": 2.47, + "learning_rate": 1.5724491716460932e-06, + "loss": 0.267, + "step": 2006 + }, + { + "epoch": 2.47, + "learning_rate": 1.5652909182616404e-06, + "loss": 0.235, + "step": 2007 + }, + { + "epoch": 2.48, + "learning_rate": 1.558147611593216e-06, + "loss": 0.2001, + "step": 2008 + }, + { + "epoch": 2.48, + "learning_rate": 1.5510192642991073e-06, + "loss": 0.2893, + "step": 2009 + }, + { + "epoch": 2.48, + "learning_rate": 1.5439058890110892e-06, + "loss": 0.2471, + "step": 2010 + }, + { + "epoch": 2.48, + "learning_rate": 1.5368074983344128e-06, + "loss": 0.2439, + "step": 2011 + }, + { + "epoch": 2.48, + "learning_rate": 1.529724104847775e-06, + "loss": 0.2386, + "step": 2012 + }, + { + "epoch": 2.48, + "learning_rate": 1.522655721103291e-06, + "loss": 0.2367, + "step": 2013 + }, + { + "epoch": 2.48, + "learning_rate": 1.5156023596264835e-06, + "loss": 0.2005, + "step": 2014 + }, + { + "epoch": 2.48, + "learning_rate": 1.5085640329162544e-06, + "loss": 0.2074, + "step": 2015 + }, + { + "epoch": 2.49, + "learning_rate": 1.5015407534448577e-06, + "loss": 0.2724, + "step": 2016 + }, + { + "epoch": 2.49, + "learning_rate": 1.494532533657893e-06, + "loss": 0.1903, + "step": 2017 + }, + { + "epoch": 2.49, + "learning_rate": 1.4875393859742626e-06, + "loss": 0.2404, + "step": 2018 + }, + { + "epoch": 2.49, + "learning_rate": 1.480561322786167e-06, + "loss": 0.266, + "step": 2019 + }, + { + "epoch": 2.49, + "learning_rate": 1.4735983564590784e-06, + "loss": 0.2215, + "step": 2020 + }, + { + "epoch": 2.49, + "learning_rate": 1.4666504993317089e-06, + "loss": 0.186, + "step": 2021 + }, + { + "epoch": 2.49, + "learning_rate": 1.4597177637159998e-06, + "loss": 0.198, + "step": 2022 + }, + { + "epoch": 2.49, + "learning_rate": 1.4528001618970966e-06, + "loss": 0.2493, + "step": 2023 + }, + { + "epoch": 2.5, + "learning_rate": 1.4458977061333301e-06, + "loss": 0.1668, + "step": 2024 + }, + { + "epoch": 2.5, + "learning_rate": 1.4390104086561886e-06, + "loss": 0.2039, + "step": 2025 + }, + { + "epoch": 2.5, + "learning_rate": 1.432138281670299e-06, + "loss": 0.1788, + "step": 2026 + }, + { + "epoch": 2.5, + "learning_rate": 1.4252813373534013e-06, + "loss": 0.2506, + "step": 2027 + }, + { + "epoch": 2.5, + "learning_rate": 1.4184395878563395e-06, + "loss": 0.2352, + "step": 2028 + }, + { + "epoch": 2.5, + "learning_rate": 1.4116130453030296e-06, + "loss": 0.2365, + "step": 2029 + }, + { + "epoch": 2.5, + "learning_rate": 1.404801721790432e-06, + "loss": 0.2634, + "step": 2030 + }, + { + "epoch": 2.5, + "learning_rate": 1.3980056293885503e-06, + "loss": 0.2073, + "step": 2031 + }, + { + "epoch": 2.51, + "learning_rate": 1.3912247801403856e-06, + "loss": 0.2326, + "step": 2032 + }, + { + "epoch": 2.51, + "learning_rate": 1.3844591860619382e-06, + "loss": 0.2217, + "step": 2033 + }, + { + "epoch": 2.51, + "learning_rate": 1.3777088591421717e-06, + "loss": 0.2594, + "step": 2034 + }, + { + "epoch": 2.51, + "learning_rate": 1.3709738113429904e-06, + "loss": 0.2254, + "step": 2035 + }, + { + "epoch": 2.51, + "learning_rate": 1.3642540545992332e-06, + "loss": 0.2858, + "step": 2036 + }, + { + "epoch": 2.51, + "learning_rate": 1.3575496008186307e-06, + "loss": 0.2264, + "step": 2037 + }, + { + "epoch": 2.51, + "learning_rate": 1.3508604618818067e-06, + "loss": 0.2521, + "step": 2038 + }, + { + "epoch": 2.51, + "learning_rate": 1.3441866496422385e-06, + "loss": 0.2468, + "step": 2039 + }, + { + "epoch": 2.52, + "learning_rate": 1.3375281759262493e-06, + "loss": 0.2193, + "step": 2040 + }, + { + "epoch": 2.52, + "learning_rate": 1.330885052532981e-06, + "loss": 0.2577, + "step": 2041 + }, + { + "epoch": 2.52, + "learning_rate": 1.3242572912343665e-06, + "loss": 0.2577, + "step": 2042 + }, + { + "epoch": 2.52, + "learning_rate": 1.3176449037751294e-06, + "loss": 0.2481, + "step": 2043 + }, + { + "epoch": 2.52, + "learning_rate": 1.3110479018727373e-06, + "loss": 0.1974, + "step": 2044 + }, + { + "epoch": 2.52, + "learning_rate": 1.3044662972174005e-06, + "loss": 0.2659, + "step": 2045 + }, + { + "epoch": 2.52, + "learning_rate": 1.2979001014720472e-06, + "loss": 0.2755, + "step": 2046 + }, + { + "epoch": 2.52, + "learning_rate": 1.2913493262722942e-06, + "loss": 0.2274, + "step": 2047 + }, + { + "epoch": 2.52, + "learning_rate": 1.2848139832264328e-06, + "loss": 0.2358, + "step": 2048 + }, + { + "epoch": 2.53, + "learning_rate": 1.2782940839154113e-06, + "loss": 0.2672, + "step": 2049 + }, + { + "epoch": 2.53, + "learning_rate": 1.2717896398928088e-06, + "loss": 0.3015, + "step": 2050 + }, + { + "epoch": 2.53, + "learning_rate": 1.2653006626848207e-06, + "loss": 0.2115, + "step": 2051 + }, + { + "epoch": 2.53, + "learning_rate": 1.2588271637902293e-06, + "loss": 0.2509, + "step": 2052 + }, + { + "epoch": 2.53, + "learning_rate": 1.2523691546803872e-06, + "loss": 0.2017, + "step": 2053 + }, + { + "epoch": 2.53, + "learning_rate": 1.245926646799205e-06, + "loss": 0.3118, + "step": 2054 + }, + { + "epoch": 2.53, + "learning_rate": 1.239499651563123e-06, + "loss": 0.2645, + "step": 2055 + }, + { + "epoch": 2.53, + "learning_rate": 1.233088180361085e-06, + "loss": 0.2518, + "step": 2056 + }, + { + "epoch": 2.54, + "learning_rate": 1.2266922445545348e-06, + "loss": 0.2335, + "step": 2057 + }, + { + "epoch": 2.54, + "learning_rate": 1.2203118554773807e-06, + "loss": 0.3091, + "step": 2058 + }, + { + "epoch": 2.54, + "learning_rate": 1.2139470244359853e-06, + "loss": 0.271, + "step": 2059 + }, + { + "epoch": 2.54, + "learning_rate": 1.2075977627091373e-06, + "loss": 0.2185, + "step": 2060 + }, + { + "epoch": 2.54, + "learning_rate": 1.201264081548038e-06, + "loss": 0.2242, + "step": 2061 + }, + { + "epoch": 2.54, + "learning_rate": 1.194945992176285e-06, + "loss": 0.2025, + "step": 2062 + }, + { + "epoch": 2.54, + "learning_rate": 1.1886435057898338e-06, + "loss": 0.2404, + "step": 2063 + }, + { + "epoch": 2.54, + "learning_rate": 1.1823566335570036e-06, + "loss": 0.1756, + "step": 2064 + }, + { + "epoch": 2.55, + "learning_rate": 1.176085386618434e-06, + "loss": 0.2871, + "step": 2065 + }, + { + "epoch": 2.55, + "learning_rate": 1.1698297760870824e-06, + "loss": 0.2438, + "step": 2066 + }, + { + "epoch": 2.55, + "learning_rate": 1.1635898130481983e-06, + "loss": 0.348, + "step": 2067 + }, + { + "epoch": 2.55, + "learning_rate": 1.1573655085592983e-06, + "loss": 0.2508, + "step": 2068 + }, + { + "epoch": 2.55, + "learning_rate": 1.151156873650151e-06, + "loss": 0.1951, + "step": 2069 + }, + { + "epoch": 2.55, + "learning_rate": 1.1449639193227646e-06, + "loss": 0.3475, + "step": 2070 + }, + { + "epoch": 2.55, + "learning_rate": 1.138786656551354e-06, + "loss": 0.2809, + "step": 2071 + }, + { + "epoch": 2.55, + "learning_rate": 1.1326250962823338e-06, + "loss": 0.2241, + "step": 2072 + }, + { + "epoch": 2.56, + "learning_rate": 1.1264792494342858e-06, + "loss": 0.2003, + "step": 2073 + }, + { + "epoch": 2.56, + "learning_rate": 1.1203491268979504e-06, + "loss": 0.2514, + "step": 2074 + }, + { + "epoch": 2.56, + "learning_rate": 1.114234739536204e-06, + "loss": 0.196, + "step": 2075 + }, + { + "epoch": 2.56, + "learning_rate": 1.1081360981840428e-06, + "loss": 0.2682, + "step": 2076 + }, + { + "epoch": 2.56, + "learning_rate": 1.1020532136485517e-06, + "loss": 0.2454, + "step": 2077 + }, + { + "epoch": 2.56, + "learning_rate": 1.0959860967089042e-06, + "loss": 0.2635, + "step": 2078 + }, + { + "epoch": 2.56, + "learning_rate": 1.0899347581163222e-06, + "loss": 0.2678, + "step": 2079 + }, + { + "epoch": 2.56, + "learning_rate": 1.0838992085940748e-06, + "loss": 0.2553, + "step": 2080 + }, + { + "epoch": 2.57, + "learning_rate": 1.0778794588374542e-06, + "loss": 0.2494, + "step": 2081 + }, + { + "epoch": 2.57, + "learning_rate": 1.0718755195137442e-06, + "loss": 0.2119, + "step": 2082 + }, + { + "epoch": 2.57, + "learning_rate": 1.0658874012622244e-06, + "loss": 0.2503, + "step": 2083 + }, + { + "epoch": 2.57, + "learning_rate": 1.0599151146941268e-06, + "loss": 0.242, + "step": 2084 + }, + { + "epoch": 2.57, + "learning_rate": 1.0539586703926396e-06, + "loss": 0.2633, + "step": 2085 + }, + { + "epoch": 2.57, + "learning_rate": 1.0480180789128691e-06, + "loss": 0.3036, + "step": 2086 + }, + { + "epoch": 2.57, + "learning_rate": 1.0420933507818332e-06, + "loss": 0.2964, + "step": 2087 + }, + { + "epoch": 2.57, + "learning_rate": 1.0361844964984435e-06, + "loss": 0.2758, + "step": 2088 + }, + { + "epoch": 2.58, + "learning_rate": 1.0302915265334722e-06, + "loss": 0.2671, + "step": 2089 + }, + { + "epoch": 2.58, + "learning_rate": 1.024414451329555e-06, + "loss": 0.2473, + "step": 2090 + }, + { + "epoch": 2.58, + "learning_rate": 1.0185532813011523e-06, + "loss": 0.2337, + "step": 2091 + }, + { + "epoch": 2.58, + "learning_rate": 1.0127080268345434e-06, + "loss": 0.1679, + "step": 2092 + }, + { + "epoch": 2.58, + "learning_rate": 1.0068786982878087e-06, + "loss": 0.1924, + "step": 2093 + }, + { + "epoch": 2.58, + "learning_rate": 1.0010653059907982e-06, + "loss": 0.2145, + "step": 2094 + }, + { + "epoch": 2.58, + "learning_rate": 9.952678602451272e-07, + "loss": 0.2173, + "step": 2095 + }, + { + "epoch": 2.58, + "learning_rate": 9.894863713241532e-07, + "loss": 0.2373, + "step": 2096 + }, + { + "epoch": 2.59, + "learning_rate": 9.837208494729567e-07, + "loss": 0.2922, + "step": 2097 + }, + { + "epoch": 2.59, + "learning_rate": 9.77971304908325e-07, + "loss": 0.278, + "step": 2098 + }, + { + "epoch": 2.59, + "learning_rate": 9.722377478187317e-07, + "loss": 0.2084, + "step": 2099 + }, + { + "epoch": 2.59, + "learning_rate": 9.66520188364316e-07, + "loss": 0.2269, + "step": 2100 + }, + { + "epoch": 2.59, + "learning_rate": 9.608186366768746e-07, + "loss": 0.2494, + "step": 2101 + }, + { + "epoch": 2.59, + "learning_rate": 9.551331028598365e-07, + "loss": 0.2525, + "step": 2102 + }, + { + "epoch": 2.59, + "learning_rate": 9.494635969882426e-07, + "loss": 0.3632, + "step": 2103 + }, + { + "epoch": 2.59, + "learning_rate": 9.438101291087364e-07, + "loss": 0.291, + "step": 2104 + }, + { + "epoch": 2.6, + "learning_rate": 9.381727092395365e-07, + "loss": 0.225, + "step": 2105 + }, + { + "epoch": 2.6, + "learning_rate": 9.325513473704273e-07, + "loss": 0.2551, + "step": 2106 + }, + { + "epoch": 2.6, + "learning_rate": 9.269460534627372e-07, + "loss": 0.2597, + "step": 2107 + }, + { + "epoch": 2.6, + "learning_rate": 9.213568374493176e-07, + "loss": 0.2773, + "step": 2108 + }, + { + "epoch": 2.6, + "learning_rate": 9.157837092345334e-07, + "loss": 0.2498, + "step": 2109 + }, + { + "epoch": 2.6, + "learning_rate": 9.102266786942426e-07, + "loss": 0.2011, + "step": 2110 + }, + { + "epoch": 2.6, + "learning_rate": 9.046857556757704e-07, + "loss": 0.2137, + "step": 2111 + }, + { + "epoch": 2.6, + "learning_rate": 8.991609499979037e-07, + "loss": 0.1704, + "step": 2112 + }, + { + "epoch": 2.6, + "learning_rate": 8.936522714508678e-07, + "loss": 0.203, + "step": 2113 + }, + { + "epoch": 2.61, + "learning_rate": 8.881597297963107e-07, + "loss": 0.234, + "step": 2114 + }, + { + "epoch": 2.61, + "learning_rate": 8.82683334767287e-07, + "loss": 0.1806, + "step": 2115 + }, + { + "epoch": 2.61, + "learning_rate": 8.772230960682282e-07, + "loss": 0.214, + "step": 2116 + }, + { + "epoch": 2.61, + "learning_rate": 8.71779023374949e-07, + "loss": 0.2731, + "step": 2117 + }, + { + "epoch": 2.61, + "learning_rate": 8.663511263346114e-07, + "loss": 0.2804, + "step": 2118 + }, + { + "epoch": 2.61, + "learning_rate": 8.609394145657146e-07, + "loss": 0.2296, + "step": 2119 + }, + { + "epoch": 2.61, + "learning_rate": 8.555438976580743e-07, + "loss": 0.2954, + "step": 2120 + }, + { + "epoch": 2.61, + "learning_rate": 8.501645851728091e-07, + "loss": 0.2174, + "step": 2121 + }, + { + "epoch": 2.62, + "learning_rate": 8.448014866423238e-07, + "loss": 0.276, + "step": 2122 + }, + { + "epoch": 2.62, + "learning_rate": 8.394546115702928e-07, + "loss": 0.2582, + "step": 2123 + }, + { + "epoch": 2.62, + "learning_rate": 8.34123969431635e-07, + "loss": 0.1809, + "step": 2124 + }, + { + "epoch": 2.62, + "learning_rate": 8.28809569672514e-07, + "loss": 0.2305, + "step": 2125 + }, + { + "epoch": 2.62, + "learning_rate": 8.235114217103012e-07, + "loss": 0.2551, + "step": 2126 + }, + { + "epoch": 2.62, + "learning_rate": 8.182295349335734e-07, + "loss": 0.2188, + "step": 2127 + }, + { + "epoch": 2.62, + "learning_rate": 8.129639187020954e-07, + "loss": 0.1986, + "step": 2128 + }, + { + "epoch": 2.62, + "learning_rate": 8.077145823467924e-07, + "loss": 0.2755, + "step": 2129 + }, + { + "epoch": 2.63, + "learning_rate": 8.024815351697457e-07, + "loss": 0.292, + "step": 2130 + }, + { + "epoch": 2.63, + "learning_rate": 7.972647864441718e-07, + "loss": 0.2003, + "step": 2131 + }, + { + "epoch": 2.63, + "learning_rate": 7.920643454144017e-07, + "loss": 0.3067, + "step": 2132 + }, + { + "epoch": 2.63, + "learning_rate": 7.868802212958704e-07, + "loss": 0.2304, + "step": 2133 + }, + { + "epoch": 2.63, + "learning_rate": 7.817124232751006e-07, + "loss": 0.3047, + "step": 2134 + }, + { + "epoch": 2.63, + "learning_rate": 7.765609605096802e-07, + "loss": 0.2629, + "step": 2135 + }, + { + "epoch": 2.63, + "learning_rate": 7.714258421282572e-07, + "loss": 0.2724, + "step": 2136 + }, + { + "epoch": 2.63, + "learning_rate": 7.663070772305081e-07, + "loss": 0.2988, + "step": 2137 + }, + { + "epoch": 2.64, + "learning_rate": 7.612046748871327e-07, + "loss": 0.274, + "step": 2138 + }, + { + "epoch": 2.64, + "learning_rate": 7.561186441398393e-07, + "loss": 0.1997, + "step": 2139 + }, + { + "epoch": 2.64, + "learning_rate": 7.510489940013244e-07, + "loss": 0.2337, + "step": 2140 + }, + { + "epoch": 2.64, + "learning_rate": 7.459957334552526e-07, + "loss": 0.2126, + "step": 2141 + }, + { + "epoch": 2.64, + "learning_rate": 7.409588714562477e-07, + "loss": 0.3267, + "step": 2142 + }, + { + "epoch": 2.64, + "learning_rate": 7.359384169298744e-07, + "loss": 0.2654, + "step": 2143 + }, + { + "epoch": 2.64, + "learning_rate": 7.309343787726264e-07, + "loss": 0.3001, + "step": 2144 + }, + { + "epoch": 2.64, + "learning_rate": 7.259467658519026e-07, + "loss": 0.2441, + "step": 2145 + }, + { + "epoch": 2.65, + "learning_rate": 7.209755870059953e-07, + "loss": 0.2682, + "step": 2146 + }, + { + "epoch": 2.65, + "learning_rate": 7.160208510440747e-07, + "loss": 0.2441, + "step": 2147 + }, + { + "epoch": 2.65, + "learning_rate": 7.110825667461762e-07, + "loss": 0.2707, + "step": 2148 + }, + { + "epoch": 2.65, + "learning_rate": 7.061607428631823e-07, + "loss": 0.2631, + "step": 2149 + }, + { + "epoch": 2.65, + "learning_rate": 7.012553881168016e-07, + "loss": 0.2033, + "step": 2150 + }, + { + "epoch": 2.65, + "learning_rate": 6.963665111995633e-07, + "loss": 0.2261, + "step": 2151 + }, + { + "epoch": 2.65, + "learning_rate": 6.914941207747972e-07, + "loss": 0.2395, + "step": 2152 + }, + { + "epoch": 2.65, + "learning_rate": 6.866382254766158e-07, + "loss": 0.2414, + "step": 2153 + }, + { + "epoch": 2.66, + "learning_rate": 6.817988339099035e-07, + "loss": 0.226, + "step": 2154 + }, + { + "epoch": 2.66, + "learning_rate": 6.769759546502952e-07, + "loss": 0.2646, + "step": 2155 + }, + { + "epoch": 2.66, + "learning_rate": 6.72169596244171e-07, + "loss": 0.2601, + "step": 2156 + }, + { + "epoch": 2.66, + "learning_rate": 6.673797672086335e-07, + "loss": 0.2303, + "step": 2157 + }, + { + "epoch": 2.66, + "learning_rate": 6.62606476031491e-07, + "loss": 0.2032, + "step": 2158 + }, + { + "epoch": 2.66, + "learning_rate": 6.578497311712484e-07, + "loss": 0.2647, + "step": 2159 + }, + { + "epoch": 2.66, + "learning_rate": 6.531095410570898e-07, + "loss": 0.2645, + "step": 2160 + }, + { + "epoch": 2.66, + "learning_rate": 6.483859140888648e-07, + "loss": 0.1766, + "step": 2161 + }, + { + "epoch": 2.67, + "learning_rate": 6.436788586370724e-07, + "loss": 0.206, + "step": 2162 + }, + { + "epoch": 2.67, + "learning_rate": 6.3898838304284e-07, + "loss": 0.3012, + "step": 2163 + }, + { + "epoch": 2.67, + "learning_rate": 6.343144956179203e-07, + "loss": 0.2631, + "step": 2164 + }, + { + "epoch": 2.67, + "learning_rate": 6.296572046446725e-07, + "loss": 0.3241, + "step": 2165 + }, + { + "epoch": 2.67, + "learning_rate": 6.250165183760426e-07, + "loss": 0.2626, + "step": 2166 + }, + { + "epoch": 2.67, + "learning_rate": 6.203924450355514e-07, + "loss": 0.1967, + "step": 2167 + }, + { + "epoch": 2.67, + "learning_rate": 6.157849928172832e-07, + "loss": 0.2223, + "step": 2168 + }, + { + "epoch": 2.67, + "learning_rate": 6.111941698858681e-07, + "loss": 0.2853, + "step": 2169 + }, + { + "epoch": 2.68, + "learning_rate": 6.066199843764697e-07, + "loss": 0.2684, + "step": 2170 + }, + { + "epoch": 2.68, + "learning_rate": 6.020624443947664e-07, + "loss": 0.2473, + "step": 2171 + }, + { + "epoch": 2.68, + "learning_rate": 5.975215580169402e-07, + "loss": 0.371, + "step": 2172 + }, + { + "epoch": 2.68, + "learning_rate": 5.929973332896677e-07, + "loss": 0.1879, + "step": 2173 + }, + { + "epoch": 2.68, + "learning_rate": 5.884897782300914e-07, + "loss": 0.269, + "step": 2174 + }, + { + "epoch": 2.68, + "learning_rate": 5.839989008258217e-07, + "loss": 0.227, + "step": 2175 + }, + { + "epoch": 2.68, + "learning_rate": 5.795247090349099e-07, + "loss": 0.2617, + "step": 2176 + }, + { + "epoch": 2.68, + "learning_rate": 5.750672107858435e-07, + "loss": 0.2608, + "step": 2177 + }, + { + "epoch": 2.69, + "learning_rate": 5.706264139775286e-07, + "loss": 0.2457, + "step": 2178 + }, + { + "epoch": 2.69, + "learning_rate": 5.662023264792715e-07, + "loss": 0.2025, + "step": 2179 + }, + { + "epoch": 2.69, + "learning_rate": 5.617949561307701e-07, + "loss": 0.2343, + "step": 2180 + }, + { + "epoch": 2.69, + "learning_rate": 5.574043107421023e-07, + "loss": 0.2582, + "step": 2181 + }, + { + "epoch": 2.69, + "learning_rate": 5.530303980937046e-07, + "loss": 0.259, + "step": 2182 + }, + { + "epoch": 2.69, + "learning_rate": 5.486732259363647e-07, + "loss": 0.2183, + "step": 2183 + }, + { + "epoch": 2.69, + "learning_rate": 5.443328019912042e-07, + "loss": 0.2274, + "step": 2184 + }, + { + "epoch": 2.69, + "learning_rate": 5.400091339496638e-07, + "loss": 0.2688, + "step": 2185 + }, + { + "epoch": 2.69, + "learning_rate": 5.357022294734959e-07, + "loss": 0.1865, + "step": 2186 + }, + { + "epoch": 2.7, + "learning_rate": 5.314120961947467e-07, + "loss": 0.254, + "step": 2187 + }, + { + "epoch": 2.7, + "learning_rate": 5.271387417157392e-07, + "loss": 0.1648, + "step": 2188 + }, + { + "epoch": 2.7, + "learning_rate": 5.228821736090684e-07, + "loss": 0.215, + "step": 2189 + }, + { + "epoch": 2.7, + "learning_rate": 5.186423994175771e-07, + "loss": 0.2337, + "step": 2190 + }, + { + "epoch": 2.7, + "learning_rate": 5.144194266543557e-07, + "loss": 0.2859, + "step": 2191 + }, + { + "epoch": 2.7, + "learning_rate": 5.102132628027168e-07, + "loss": 0.2417, + "step": 2192 + }, + { + "epoch": 2.7, + "learning_rate": 5.060239153161872e-07, + "loss": 0.2095, + "step": 2193 + }, + { + "epoch": 2.7, + "learning_rate": 5.018513916184963e-07, + "loss": 0.2244, + "step": 2194 + }, + { + "epoch": 2.71, + "learning_rate": 4.976956991035587e-07, + "loss": 0.2451, + "step": 2195 + }, + { + "epoch": 2.71, + "learning_rate": 4.935568451354645e-07, + "loss": 0.1786, + "step": 2196 + }, + { + "epoch": 2.71, + "learning_rate": 4.894348370484648e-07, + "loss": 0.2102, + "step": 2197 + }, + { + "epoch": 2.71, + "learning_rate": 4.853296821469589e-07, + "loss": 0.2391, + "step": 2198 + }, + { + "epoch": 2.71, + "learning_rate": 4.812413877054833e-07, + "loss": 0.2503, + "step": 2199 + }, + { + "epoch": 2.71, + "learning_rate": 4.771699609686919e-07, + "loss": 0.2435, + "step": 2200 + }, + { + "epoch": 2.71, + "learning_rate": 4.731154091513546e-07, + "loss": 0.2387, + "step": 2201 + }, + { + "epoch": 2.71, + "learning_rate": 4.690777394383339e-07, + "loss": 0.2238, + "step": 2202 + }, + { + "epoch": 2.72, + "learning_rate": 4.650569589845766e-07, + "loss": 0.241, + "step": 2203 + }, + { + "epoch": 2.72, + "learning_rate": 4.610530749151032e-07, + "loss": 0.233, + "step": 2204 + }, + { + "epoch": 2.72, + "learning_rate": 4.570660943249927e-07, + "loss": 0.2031, + "step": 2205 + }, + { + "epoch": 2.72, + "learning_rate": 4.5309602427936584e-07, + "loss": 0.2503, + "step": 2206 + }, + { + "epoch": 2.72, + "learning_rate": 4.491428718133817e-07, + "loss": 0.2533, + "step": 2207 + }, + { + "epoch": 2.72, + "learning_rate": 4.4520664393222e-07, + "loss": 0.2275, + "step": 2208 + }, + { + "epoch": 2.72, + "learning_rate": 4.412873476110702e-07, + "loss": 0.2279, + "step": 2209 + }, + { + "epoch": 2.72, + "learning_rate": 4.3738498979511545e-07, + "loss": 0.2656, + "step": 2210 + }, + { + "epoch": 2.73, + "learning_rate": 4.334995773995221e-07, + "loss": 0.2178, + "step": 2211 + }, + { + "epoch": 2.73, + "learning_rate": 4.296311173094314e-07, + "loss": 0.2437, + "step": 2212 + }, + { + "epoch": 2.73, + "learning_rate": 4.2577961637994544e-07, + "loss": 0.1933, + "step": 2213 + }, + { + "epoch": 2.73, + "learning_rate": 4.2194508143610925e-07, + "loss": 0.2381, + "step": 2214 + }, + { + "epoch": 2.73, + "learning_rate": 4.181275192729084e-07, + "loss": 0.2387, + "step": 2215 + }, + { + "epoch": 2.73, + "learning_rate": 4.1432693665524715e-07, + "loss": 0.1865, + "step": 2216 + }, + { + "epoch": 2.73, + "learning_rate": 4.1054334031794373e-07, + "loss": 0.2403, + "step": 2217 + }, + { + "epoch": 2.73, + "learning_rate": 4.067767369657161e-07, + "loss": 0.2528, + "step": 2218 + }, + { + "epoch": 2.74, + "learning_rate": 4.0302713327316834e-07, + "loss": 0.24, + "step": 2219 + }, + { + "epoch": 2.74, + "learning_rate": 3.992945358847833e-07, + "loss": 0.2574, + "step": 2220 + }, + { + "epoch": 2.74, + "learning_rate": 3.955789514149022e-07, + "loss": 0.2809, + "step": 2221 + }, + { + "epoch": 2.74, + "learning_rate": 3.9188038644772495e-07, + "loss": 0.3305, + "step": 2222 + }, + { + "epoch": 2.74, + "learning_rate": 3.8819884753728665e-07, + "loss": 0.2554, + "step": 2223 + }, + { + "epoch": 2.74, + "learning_rate": 3.8453434120745535e-07, + "loss": 0.2926, + "step": 2224 + }, + { + "epoch": 2.74, + "learning_rate": 3.808868739519167e-07, + "loss": 0.2142, + "step": 2225 + }, + { + "epoch": 2.74, + "learning_rate": 3.772564522341582e-07, + "loss": 0.2036, + "step": 2226 + }, + { + "epoch": 2.75, + "learning_rate": 3.736430824874637e-07, + "loss": 0.2249, + "step": 2227 + }, + { + "epoch": 2.75, + "learning_rate": 3.700467711149025e-07, + "loss": 0.1955, + "step": 2228 + }, + { + "epoch": 2.75, + "learning_rate": 3.6646752448931345e-07, + "loss": 0.2294, + "step": 2229 + }, + { + "epoch": 2.75, + "learning_rate": 3.629053489532963e-07, + "loss": 0.1743, + "step": 2230 + }, + { + "epoch": 2.75, + "learning_rate": 3.5936025081919957e-07, + "loss": 0.2289, + "step": 2231 + }, + { + "epoch": 2.75, + "learning_rate": 3.5583223636911027e-07, + "loss": 0.2036, + "step": 2232 + }, + { + "epoch": 2.75, + "learning_rate": 3.5232131185484075e-07, + "loss": 0.2003, + "step": 2233 + }, + { + "epoch": 2.75, + "learning_rate": 3.488274834979233e-07, + "loss": 0.2985, + "step": 2234 + }, + { + "epoch": 2.76, + "learning_rate": 3.453507574895898e-07, + "loss": 0.253, + "step": 2235 + }, + { + "epoch": 2.76, + "learning_rate": 3.4189113999076983e-07, + "loss": 0.2217, + "step": 2236 + }, + { + "epoch": 2.76, + "learning_rate": 3.3844863713207276e-07, + "loss": 0.2433, + "step": 2237 + }, + { + "epoch": 2.76, + "learning_rate": 3.350232550137833e-07, + "loss": 0.2084, + "step": 2238 + }, + { + "epoch": 2.76, + "learning_rate": 3.3161499970584597e-07, + "loss": 0.315, + "step": 2239 + }, + { + "epoch": 2.76, + "learning_rate": 3.282238772478541e-07, + "loss": 0.1964, + "step": 2240 + }, + { + "epoch": 2.76, + "learning_rate": 3.2484989364904295e-07, + "loss": 0.2923, + "step": 2241 + }, + { + "epoch": 2.76, + "learning_rate": 3.2149305488827553e-07, + "loss": 0.1943, + "step": 2242 + }, + { + "epoch": 2.77, + "learning_rate": 3.1815336691403464e-07, + "loss": 0.3159, + "step": 2243 + }, + { + "epoch": 2.77, + "learning_rate": 3.148308356444085e-07, + "loss": 0.2466, + "step": 2244 + }, + { + "epoch": 2.77, + "learning_rate": 3.115254669670864e-07, + "loss": 0.2346, + "step": 2245 + }, + { + "epoch": 2.77, + "learning_rate": 3.082372667393441e-07, + "loss": 0.2641, + "step": 2246 + }, + { + "epoch": 2.77, + "learning_rate": 3.049662407880294e-07, + "loss": 0.2195, + "step": 2247 + }, + { + "epoch": 2.77, + "learning_rate": 3.0171239490956237e-07, + "loss": 0.2371, + "step": 2248 + }, + { + "epoch": 2.77, + "learning_rate": 2.984757348699152e-07, + "loss": 0.2278, + "step": 2249 + }, + { + "epoch": 2.77, + "learning_rate": 2.952562664046088e-07, + "loss": 0.2492, + "step": 2250 + }, + { + "epoch": 2.77, + "learning_rate": 2.9205399521869847e-07, + "loss": 0.2457, + "step": 2251 + }, + { + "epoch": 2.78, + "learning_rate": 2.8886892698676394e-07, + "loss": 0.2315, + "step": 2252 + }, + { + "epoch": 2.78, + "learning_rate": 2.857010673529015e-07, + "loss": 0.2349, + "step": 2253 + }, + { + "epoch": 2.78, + "learning_rate": 2.825504219307118e-07, + "loss": 0.2202, + "step": 2254 + }, + { + "epoch": 2.78, + "learning_rate": 2.7941699630329556e-07, + "loss": 0.2405, + "step": 2255 + }, + { + "epoch": 2.78, + "learning_rate": 2.7630079602323447e-07, + "loss": 0.2697, + "step": 2256 + }, + { + "epoch": 2.78, + "learning_rate": 2.7320182661258687e-07, + "loss": 0.2286, + "step": 2257 + }, + { + "epoch": 2.78, + "learning_rate": 2.701200935628767e-07, + "loss": 0.2114, + "step": 2258 + }, + { + "epoch": 2.78, + "learning_rate": 2.6705560233508787e-07, + "loss": 0.1924, + "step": 2259 + }, + { + "epoch": 2.79, + "learning_rate": 2.6400835835964645e-07, + "loss": 0.1452, + "step": 2260 + }, + { + "epoch": 2.79, + "learning_rate": 2.6097836703641856e-07, + "loss": 0.1814, + "step": 2261 + }, + { + "epoch": 2.79, + "learning_rate": 2.5796563373469585e-07, + "loss": 0.2495, + "step": 2262 + }, + { + "epoch": 2.79, + "learning_rate": 2.5497016379318894e-07, + "loss": 0.2362, + "step": 2263 + }, + { + "epoch": 2.79, + "learning_rate": 2.5199196252001623e-07, + "loss": 0.2737, + "step": 2264 + }, + { + "epoch": 2.79, + "learning_rate": 2.4903103519269724e-07, + "loss": 0.3056, + "step": 2265 + }, + { + "epoch": 2.79, + "learning_rate": 2.4608738705813706e-07, + "loss": 0.2802, + "step": 2266 + }, + { + "epoch": 2.79, + "learning_rate": 2.4316102333262647e-07, + "loss": 0.2011, + "step": 2267 + }, + { + "epoch": 2.8, + "learning_rate": 2.4025194920182405e-07, + "loss": 0.2253, + "step": 2268 + }, + { + "epoch": 2.8, + "learning_rate": 2.3736016982075172e-07, + "loss": 0.2237, + "step": 2269 + }, + { + "epoch": 2.8, + "learning_rate": 2.3448569031378043e-07, + "loss": 0.2563, + "step": 2270 + }, + { + "epoch": 2.8, + "learning_rate": 2.316285157746312e-07, + "loss": 0.2887, + "step": 2271 + }, + { + "epoch": 2.8, + "learning_rate": 2.2878865126635618e-07, + "loss": 0.1987, + "step": 2272 + }, + { + "epoch": 2.8, + "learning_rate": 2.2596610182133328e-07, + "loss": 0.2352, + "step": 2273 + }, + { + "epoch": 2.8, + "learning_rate": 2.2316087244125928e-07, + "loss": 0.2707, + "step": 2274 + }, + { + "epoch": 2.8, + "learning_rate": 2.2037296809713448e-07, + "loss": 0.2557, + "step": 2275 + }, + { + "epoch": 2.81, + "learning_rate": 2.1760239372926372e-07, + "loss": 0.2394, + "step": 2276 + }, + { + "epoch": 2.81, + "learning_rate": 2.1484915424723973e-07, + "loss": 0.2192, + "step": 2277 + }, + { + "epoch": 2.81, + "learning_rate": 2.121132545299376e-07, + "loss": 0.191, + "step": 2278 + }, + { + "epoch": 2.81, + "learning_rate": 2.093946994255036e-07, + "loss": 0.2468, + "step": 2279 + }, + { + "epoch": 2.81, + "learning_rate": 2.0669349375135094e-07, + "loss": 0.2919, + "step": 2280 + }, + { + "epoch": 2.81, + "learning_rate": 2.0400964229414732e-07, + "loss": 0.2645, + "step": 2281 + }, + { + "epoch": 2.81, + "learning_rate": 2.0134314980980952e-07, + "loss": 0.2294, + "step": 2282 + }, + { + "epoch": 2.81, + "learning_rate": 1.986940210234922e-07, + "loss": 0.2362, + "step": 2283 + }, + { + "epoch": 2.82, + "learning_rate": 1.9606226062957922e-07, + "loss": 0.2462, + "step": 2284 + }, + { + "epoch": 2.82, + "learning_rate": 1.9344787329168002e-07, + "loss": 0.2253, + "step": 2285 + }, + { + "epoch": 2.82, + "learning_rate": 1.908508636426176e-07, + "loss": 0.2706, + "step": 2286 + }, + { + "epoch": 2.82, + "learning_rate": 1.8827123628441634e-07, + "loss": 0.2477, + "step": 2287 + }, + { + "epoch": 2.82, + "learning_rate": 1.8570899578830293e-07, + "loss": 0.1967, + "step": 2288 + }, + { + "epoch": 2.82, + "learning_rate": 1.831641466946954e-07, + "loss": 0.2665, + "step": 2289 + }, + { + "epoch": 2.82, + "learning_rate": 1.8063669351318757e-07, + "loss": 0.2725, + "step": 2290 + }, + { + "epoch": 2.82, + "learning_rate": 1.7812664072255014e-07, + "loss": 0.1859, + "step": 2291 + }, + { + "epoch": 2.83, + "learning_rate": 1.756339927707196e-07, + "loss": 0.2347, + "step": 2292 + }, + { + "epoch": 2.83, + "learning_rate": 1.731587540747903e-07, + "loss": 0.2401, + "step": 2293 + }, + { + "epoch": 2.83, + "learning_rate": 1.70700929021006e-07, + "loss": 0.2434, + "step": 2294 + }, + { + "epoch": 2.83, + "learning_rate": 1.682605219647515e-07, + "loss": 0.2799, + "step": 2295 + }, + { + "epoch": 2.83, + "learning_rate": 1.658375372305465e-07, + "loss": 0.2425, + "step": 2296 + }, + { + "epoch": 2.83, + "learning_rate": 1.6343197911203978e-07, + "loss": 0.2486, + "step": 2297 + }, + { + "epoch": 2.83, + "learning_rate": 1.6104385187199812e-07, + "loss": 0.1662, + "step": 2298 + }, + { + "epoch": 2.83, + "learning_rate": 1.5867315974229968e-07, + "loss": 0.2624, + "step": 2299 + }, + { + "epoch": 2.84, + "learning_rate": 1.5631990692392296e-07, + "loss": 0.1657, + "step": 2300 + }, + { + "epoch": 2.84, + "learning_rate": 1.5398409758695e-07, + "loss": 0.2167, + "step": 2301 + }, + { + "epoch": 2.84, + "learning_rate": 1.5166573587054867e-07, + "loss": 0.2774, + "step": 2302 + }, + { + "epoch": 2.84, + "learning_rate": 1.4936482588296942e-07, + "loss": 0.2339, + "step": 2303 + }, + { + "epoch": 2.84, + "learning_rate": 1.4708137170153626e-07, + "loss": 0.2423, + "step": 2304 + }, + { + "epoch": 2.84, + "learning_rate": 1.448153773726402e-07, + "loss": 0.2248, + "step": 2305 + }, + { + "epoch": 2.84, + "learning_rate": 1.4256684691173584e-07, + "loss": 0.2958, + "step": 2306 + }, + { + "epoch": 2.84, + "learning_rate": 1.4033578430332706e-07, + "loss": 0.1968, + "step": 2307 + }, + { + "epoch": 2.85, + "learning_rate": 1.381221935009669e-07, + "loss": 0.2272, + "step": 2308 + }, + { + "epoch": 2.85, + "learning_rate": 1.3592607842724648e-07, + "loss": 0.2463, + "step": 2309 + }, + { + "epoch": 2.85, + "learning_rate": 1.3374744297378839e-07, + "loss": 0.2404, + "step": 2310 + }, + { + "epoch": 2.85, + "learning_rate": 1.3158629100124e-07, + "loss": 0.2423, + "step": 2311 + }, + { + "epoch": 2.85, + "learning_rate": 1.2944262633927007e-07, + "loss": 0.2341, + "step": 2312 + }, + { + "epoch": 2.85, + "learning_rate": 1.2731645278655448e-07, + "loss": 0.2754, + "step": 2313 + }, + { + "epoch": 2.85, + "learning_rate": 1.2520777411077822e-07, + "loss": 0.2293, + "step": 2314 + }, + { + "epoch": 2.85, + "learning_rate": 1.231165940486234e-07, + "loss": 0.2041, + "step": 2315 + }, + { + "epoch": 2.86, + "learning_rate": 1.2104291630576136e-07, + "loss": 0.2103, + "step": 2316 + }, + { + "epoch": 2.86, + "learning_rate": 1.1898674455685045e-07, + "loss": 0.2272, + "step": 2317 + }, + { + "epoch": 2.86, + "learning_rate": 1.1694808244552824e-07, + "loss": 0.2036, + "step": 2318 + }, + { + "epoch": 2.86, + "learning_rate": 1.1492693358440276e-07, + "loss": 0.2583, + "step": 2319 + }, + { + "epoch": 2.86, + "learning_rate": 1.1292330155505016e-07, + "loss": 0.2214, + "step": 2320 + }, + { + "epoch": 2.86, + "learning_rate": 1.109371899080025e-07, + "loss": 0.2656, + "step": 2321 + }, + { + "epoch": 2.86, + "learning_rate": 1.0896860216274563e-07, + "loss": 0.2102, + "step": 2322 + }, + { + "epoch": 2.86, + "learning_rate": 1.0701754180771462e-07, + "loss": 0.2487, + "step": 2323 + }, + { + "epoch": 2.86, + "learning_rate": 1.0508401230028387e-07, + "loss": 0.2849, + "step": 2324 + }, + { + "epoch": 2.87, + "learning_rate": 1.0316801706676038e-07, + "loss": 0.1942, + "step": 2325 + }, + { + "epoch": 2.87, + "learning_rate": 1.0126955950238271e-07, + "loss": 0.2409, + "step": 2326 + }, + { + "epoch": 2.87, + "learning_rate": 9.93886429713098e-08, + "loss": 0.2169, + "step": 2327 + }, + { + "epoch": 2.87, + "learning_rate": 9.752527080661655e-08, + "loss": 0.2442, + "step": 2328 + }, + { + "epoch": 2.87, + "learning_rate": 9.56794463102917e-08, + "loss": 0.2263, + "step": 2329 + }, + { + "epoch": 2.87, + "learning_rate": 9.38511727532232e-08, + "loss": 0.2038, + "step": 2330 + }, + { + "epoch": 2.87, + "learning_rate": 9.204045337520395e-08, + "loss": 0.3241, + "step": 2331 + }, + { + "epoch": 2.87, + "learning_rate": 9.024729138491506e-08, + "loss": 0.1977, + "step": 2332 + }, + { + "epoch": 2.88, + "learning_rate": 8.847168995992916e-08, + "loss": 0.2846, + "step": 2333 + }, + { + "epoch": 2.88, + "learning_rate": 8.671365224669492e-08, + "loss": 0.2333, + "step": 2334 + }, + { + "epoch": 2.88, + "learning_rate": 8.497318136054477e-08, + "loss": 0.2191, + "step": 2335 + }, + { + "epoch": 2.88, + "learning_rate": 8.325028038567606e-08, + "loss": 0.2557, + "step": 2336 + }, + { + "epoch": 2.88, + "learning_rate": 8.154495237515436e-08, + "loss": 0.1953, + "step": 2337 + }, + { + "epoch": 2.88, + "learning_rate": 7.985720035090239e-08, + "loss": 0.2103, + "step": 2338 + }, + { + "epoch": 2.88, + "learning_rate": 7.818702730370109e-08, + "loss": 0.2481, + "step": 2339 + }, + { + "epoch": 2.88, + "learning_rate": 7.653443619317747e-08, + "loss": 0.2608, + "step": 2340 + }, + { + "epoch": 2.89, + "learning_rate": 7.489942994780452e-08, + "loss": 0.1832, + "step": 2341 + }, + { + "epoch": 2.89, + "learning_rate": 7.328201146489244e-08, + "loss": 0.251, + "step": 2342 + }, + { + "epoch": 2.89, + "learning_rate": 7.16821836105841e-08, + "loss": 0.2721, + "step": 2343 + }, + { + "epoch": 2.89, + "learning_rate": 7.009994921985508e-08, + "loss": 0.2149, + "step": 2344 + }, + { + "epoch": 2.89, + "learning_rate": 6.853531109650147e-08, + "loss": 0.1865, + "step": 2345 + }, + { + "epoch": 2.89, + "learning_rate": 6.698827201313762e-08, + "loss": 0.1614, + "step": 2346 + }, + { + "epoch": 2.89, + "learning_rate": 6.545883471119174e-08, + "loss": 0.2759, + "step": 2347 + }, + { + "epoch": 2.89, + "learning_rate": 6.394700190090252e-08, + "loss": 0.235, + "step": 2348 + }, + { + "epoch": 2.9, + "learning_rate": 6.245277626131142e-08, + "loss": 0.2051, + "step": 2349 + }, + { + "epoch": 2.9, + "learning_rate": 6.097616044025922e-08, + "loss": 0.2769, + "step": 2350 + }, + { + "epoch": 2.9, + "learning_rate": 5.951715705437955e-08, + "loss": 0.2696, + "step": 2351 + }, + { + "epoch": 2.9, + "learning_rate": 5.807576868909981e-08, + "loss": 0.2615, + "step": 2352 + }, + { + "epoch": 2.9, + "learning_rate": 5.665199789862907e-08, + "loss": 0.3078, + "step": 2353 + }, + { + "epoch": 2.9, + "learning_rate": 5.5245847205959156e-08, + "loss": 0.2206, + "step": 2354 + }, + { + "epoch": 2.9, + "learning_rate": 5.3857319102857967e-08, + "loss": 0.2809, + "step": 2355 + }, + { + "epoch": 2.9, + "learning_rate": 5.248641604986393e-08, + "loss": 0.2189, + "step": 2356 + }, + { + "epoch": 2.91, + "learning_rate": 5.113314047628493e-08, + "loss": 0.1716, + "step": 2357 + }, + { + "epoch": 2.91, + "learning_rate": 4.979749478019158e-08, + "loss": 0.2309, + "step": 2358 + }, + { + "epoch": 2.91, + "learning_rate": 4.8479481328413955e-08, + "loss": 0.2271, + "step": 2359 + }, + { + "epoch": 2.91, + "learning_rate": 4.7179102456533786e-08, + "loss": 0.1993, + "step": 2360 + }, + { + "epoch": 2.91, + "learning_rate": 4.589636046888779e-08, + "loss": 0.2607, + "step": 2361 + }, + { + "epoch": 2.91, + "learning_rate": 4.463125763855769e-08, + "loss": 0.2393, + "step": 2362 + }, + { + "epoch": 2.91, + "learning_rate": 4.338379620736577e-08, + "loss": 0.2625, + "step": 2363 + }, + { + "epoch": 2.91, + "learning_rate": 4.2153978385875985e-08, + "loss": 0.2392, + "step": 2364 + }, + { + "epoch": 2.92, + "learning_rate": 4.094180635338396e-08, + "loss": 0.2291, + "step": 2365 + }, + { + "epoch": 2.92, + "learning_rate": 3.974728225791924e-08, + "loss": 0.2086, + "step": 2366 + }, + { + "epoch": 2.92, + "learning_rate": 3.8570408216236366e-08, + "loss": 0.2187, + "step": 2367 + }, + { + "epoch": 2.92, + "learning_rate": 3.741118631381269e-08, + "loss": 0.2433, + "step": 2368 + }, + { + "epoch": 2.92, + "learning_rate": 3.626961860484723e-08, + "loss": 0.236, + "step": 2369 + }, + { + "epoch": 2.92, + "learning_rate": 3.514570711225296e-08, + "loss": 0.2713, + "step": 2370 + }, + { + "epoch": 2.92, + "learning_rate": 3.403945382765561e-08, + "loss": 0.2253, + "step": 2371 + }, + { + "epoch": 2.92, + "learning_rate": 3.295086071139153e-08, + "loss": 0.2635, + "step": 2372 + }, + { + "epoch": 2.93, + "learning_rate": 3.187992969249876e-08, + "loss": 0.2139, + "step": 2373 + }, + { + "epoch": 2.93, + "learning_rate": 3.082666266872036e-08, + "loss": 0.2821, + "step": 2374 + }, + { + "epoch": 2.93, + "learning_rate": 2.9791061506496686e-08, + "loss": 0.2881, + "step": 2375 + }, + { + "epoch": 2.93, + "learning_rate": 2.8773128040964214e-08, + "loss": 0.2413, + "step": 2376 + }, + { + "epoch": 2.93, + "learning_rate": 2.7772864075950036e-08, + "loss": 0.2229, + "step": 2377 + }, + { + "epoch": 2.93, + "learning_rate": 2.6790271383970723e-08, + "loss": 0.2572, + "step": 2378 + }, + { + "epoch": 2.93, + "learning_rate": 2.5825351706227908e-08, + "loss": 0.2878, + "step": 2379 + }, + { + "epoch": 2.93, + "learning_rate": 2.4878106752607157e-08, + "loss": 0.2954, + "step": 2380 + }, + { + "epoch": 2.94, + "learning_rate": 2.3948538201672423e-08, + "loss": 0.3295, + "step": 2381 + }, + { + "epoch": 2.94, + "learning_rate": 2.303664770066494e-08, + "loss": 0.2305, + "step": 2382 + }, + { + "epoch": 2.94, + "learning_rate": 2.2142436865499884e-08, + "loss": 0.2351, + "step": 2383 + }, + { + "epoch": 2.94, + "learning_rate": 2.1265907280759725e-08, + "loss": 0.2028, + "step": 2384 + }, + { + "epoch": 2.94, + "learning_rate": 2.040706049970087e-08, + "loss": 0.2631, + "step": 2385 + }, + { + "epoch": 2.94, + "learning_rate": 1.9565898044239252e-08, + "loss": 0.2132, + "step": 2386 + }, + { + "epoch": 2.94, + "learning_rate": 1.8742421404956968e-08, + "loss": 0.2103, + "step": 2387 + }, + { + "epoch": 2.94, + "learning_rate": 1.7936632041094527e-08, + "loss": 0.2305, + "step": 2388 + }, + { + "epoch": 2.95, + "learning_rate": 1.7148531380550836e-08, + "loss": 0.3106, + "step": 2389 + }, + { + "epoch": 2.95, + "learning_rate": 1.6378120819877665e-08, + "loss": 0.1841, + "step": 2390 + }, + { + "epoch": 2.95, + "learning_rate": 1.562540172427962e-08, + "loss": 0.2225, + "step": 2391 + }, + { + "epoch": 2.95, + "learning_rate": 1.4890375427613069e-08, + "loss": 0.2387, + "step": 2392 + }, + { + "epoch": 2.95, + "learning_rate": 1.4173043232380557e-08, + "loss": 0.2072, + "step": 2393 + }, + { + "epoch": 2.95, + "learning_rate": 1.3473406409728607e-08, + "loss": 0.2792, + "step": 2394 + }, + { + "epoch": 2.95, + "learning_rate": 1.2791466199447711e-08, + "loss": 0.2875, + "step": 2395 + }, + { + "epoch": 2.95, + "learning_rate": 1.2127223809970112e-08, + "loss": 0.2378, + "step": 2396 + }, + { + "epoch": 2.95, + "learning_rate": 1.1480680418365364e-08, + "loss": 0.2001, + "step": 2397 + }, + { + "epoch": 2.96, + "learning_rate": 1.0851837170340329e-08, + "loss": 0.2492, + "step": 2398 + }, + { + "epoch": 2.96, + "learning_rate": 1.0240695180234739e-08, + "loss": 0.3008, + "step": 2399 + }, + { + "epoch": 2.96, + "learning_rate": 9.647255531023415e-09, + "loss": 0.2488, + "step": 2400 + }, + { + "epoch": 2.96, + "learning_rate": 9.071519274308494e-09, + "loss": 0.2591, + "step": 2401 + }, + { + "epoch": 2.96, + "learning_rate": 8.513487430324985e-09, + "loss": 0.2423, + "step": 2402 + }, + { + "epoch": 2.96, + "learning_rate": 7.973160987931883e-09, + "loss": 0.2322, + "step": 2403 + }, + { + "epoch": 2.96, + "learning_rate": 7.450540904612169e-09, + "loss": 0.2481, + "step": 2404 + }, + { + "epoch": 2.96, + "learning_rate": 6.945628106477254e-09, + "loss": 0.2194, + "step": 2405 + }, + { + "epoch": 2.97, + "learning_rate": 6.4584234882547616e-09, + "loss": 0.2717, + "step": 2406 + }, + { + "epoch": 2.97, + "learning_rate": 5.988927913295195e-09, + "loss": 0.2822, + "step": 2407 + }, + { + "epoch": 2.97, + "learning_rate": 5.537142213569713e-09, + "loss": 0.246, + "step": 2408 + }, + { + "epoch": 2.97, + "learning_rate": 5.1030671896623585e-09, + "loss": 0.2922, + "step": 2409 + }, + { + "epoch": 2.97, + "learning_rate": 4.6867036107767215e-09, + "loss": 0.2088, + "step": 2410 + }, + { + "epoch": 2.97, + "learning_rate": 4.288052214727057e-09, + "loss": 0.2855, + "step": 2411 + }, + { + "epoch": 2.97, + "learning_rate": 3.907113707946053e-09, + "loss": 0.1692, + "step": 2412 + }, + { + "epoch": 2.97, + "learning_rate": 3.5438887654737355e-09, + "loss": 0.2182, + "step": 2413 + }, + { + "epoch": 2.98, + "learning_rate": 3.198378030963012e-09, + "loss": 0.208, + "step": 2414 + }, + { + "epoch": 2.98, + "learning_rate": 2.870582116676346e-09, + "loss": 0.2157, + "step": 2415 + }, + { + "epoch": 2.98, + "learning_rate": 2.5605016034813134e-09, + "loss": 0.2076, + "step": 2416 + }, + { + "epoch": 2.98, + "learning_rate": 2.268137040859486e-09, + "loss": 0.2028, + "step": 2417 + }, + { + "epoch": 2.98, + "learning_rate": 1.993488946891997e-09, + "loss": 0.307, + "step": 2418 + }, + { + "epoch": 2.98, + "learning_rate": 1.7365578082706447e-09, + "loss": 0.3018, + "step": 2419 + }, + { + "epoch": 2.98, + "learning_rate": 1.4973440802890094e-09, + "loss": 0.2639, + "step": 2420 + }, + { + "epoch": 2.98, + "learning_rate": 1.275848186845785e-09, + "loss": 0.2743, + "step": 2421 + }, + { + "epoch": 2.99, + "learning_rate": 1.0720705204414483e-09, + "loss": 0.2223, + "step": 2422 + }, + { + "epoch": 2.99, + "learning_rate": 8.860114421826993e-10, + "loss": 0.2473, + "step": 2423 + }, + { + "epoch": 2.99, + "learning_rate": 7.176712817724696e-10, + "loss": 0.3147, + "step": 2424 + }, + { + "epoch": 2.99, + "learning_rate": 5.670503375188041e-10, + "loss": 0.2396, + "step": 2425 + }, + { + "epoch": 2.99, + "learning_rate": 4.3414887633042023e-10, + "loss": 0.263, + "step": 2426 + }, + { + "epoch": 2.99, + "learning_rate": 3.1896713371337706e-10, + "loss": 0.3078, + "step": 2427 + }, + { + "epoch": 2.99, + "learning_rate": 2.2150531377551633e-10, + "loss": 0.2524, + "step": 2428 + }, + { + "epoch": 2.99, + "learning_rate": 1.4176358922535216e-10, + "loss": 0.2462, + "step": 2429 + }, + { + "epoch": 3.0, + "learning_rate": 7.97421013687405e-11, + "loss": 0.2789, + "step": 2430 + }, + { + "epoch": 3.0, + "learning_rate": 3.544096010998921e-11, + "loss": 0.2202, + "step": 2431 + }, + { + "epoch": 3.0, + "learning_rate": 8.860243952968361e-12, + "loss": 0.2295, + "step": 2432 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 0.294, + "step": 2433 + }, + { + "epoch": 3.0, + "step": 2433, + "total_flos": 1.9269628383880806e+17, + "train_loss": 0.606949764915175, + "train_runtime": 9820.3372, + "train_samples_per_second": 15.868, + "train_steps_per_second": 0.248 + } + ], + "max_steps": 2433, + "num_train_epochs": 3, + "total_flos": 1.9269628383880806e+17, + "trial_name": null, + "trial_params": null +}