diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,26751 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.964060264287518, + "eval_steps": 500, + "global_step": 4455, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.7251, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.684, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 0.7066, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6894, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 0.7617, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 0.6869, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.4e-05, + "loss": 0.6697, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7293, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-05, + "loss": 0.6381, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.6163, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999998596668246e-05, + "loss": 0.6307, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999994386673375e-05, + "loss": 0.6536, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999987370016564e-05, + "loss": 0.6721, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999977546699793e-05, + "loss": 0.6227, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999964916725805e-05, + "loss": 0.6545, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999949480098158e-05, + "loss": 0.6805, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 1.999993123682118e-05, + "loss": 0.661, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999910186899984e-05, + "loss": 0.6475, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 1.999988633034049e-05, + "loss": 0.6131, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999859667149386e-05, + "loss": 0.6337, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999830197334157e-05, + "loss": 0.7843, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 1.9999797920903076e-05, + "loss": 0.6236, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999762837865202e-05, + "loss": 0.644, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 1.999972494823038e-05, + "loss": 0.6497, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999684252009243e-05, + "loss": 0.6422, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999640749213215e-05, + "loss": 0.622, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999594439854504e-05, + "loss": 0.6153, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 1.999954532394611e-05, + "loss": 0.6442, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 1.999949340150182e-05, + "loss": 0.62, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999438672536202e-05, + "loss": 0.625, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999381137064617e-05, + "loss": 0.5887, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999320795103215e-05, + "loss": 0.5998, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999257646668936e-05, + "loss": 0.5878, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 1.9999191691779494e-05, + "loss": 0.5898, + "step": 34 + }, + { + "epoch": 0.02, + "learning_rate": 1.999912293045341e-05, + "loss": 0.6015, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 1.999905136270998e-05, + "loss": 0.5943, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 1.9998976988569283e-05, + "loss": 0.6411, + "step": 37 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998899808052203e-05, + "loss": 0.6758, + "step": 38 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998819821180398e-05, + "loss": 0.6665, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998737027976323e-05, + "loss": 0.5868, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998651428463205e-05, + "loss": 0.6656, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998563022665078e-05, + "loss": 0.5947, + "step": 42 + }, + { + "epoch": 0.03, + "learning_rate": 1.999847181060675e-05, + "loss": 0.6319, + "step": 43 + }, + { + "epoch": 0.03, + "learning_rate": 1.999837779231382e-05, + "loss": 0.5821, + "step": 44 + }, + { + "epoch": 0.03, + "learning_rate": 1.999828096781268e-05, + "loss": 0.6216, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998181337130503e-05, + "loss": 0.6095, + "step": 46 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998078900295254e-05, + "loss": 0.5949, + "step": 47 + }, + { + "epoch": 0.03, + "learning_rate": 1.999797365733568e-05, + "loss": 0.6879, + "step": 48 + }, + { + "epoch": 0.03, + "learning_rate": 1.999786560828132e-05, + "loss": 0.599, + "step": 49 + }, + { + "epoch": 0.03, + "learning_rate": 1.99977547531625e-05, + "loss": 0.5634, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 1.999764109201034e-05, + "loss": 0.6556, + "step": 51 + }, + { + "epoch": 0.04, + "learning_rate": 1.999752462485673e-05, + "loss": 0.576, + "step": 52 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997405351734365e-05, + "loss": 0.5823, + "step": 53 + }, + { + "epoch": 0.04, + "learning_rate": 1.999728327267672e-05, + "loss": 0.5989, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997158387718057e-05, + "loss": 0.5854, + "step": 55 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997030696893427e-05, + "loss": 0.6511, + "step": 56 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996900200238668e-05, + "loss": 0.6048, + "step": 57 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996766897790412e-05, + "loss": 0.5831, + "step": 58 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996630789586065e-05, + "loss": 0.565, + "step": 59 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996491875663833e-05, + "loss": 0.5952, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996350156062697e-05, + "loss": 0.5649, + "step": 61 + }, + { + "epoch": 0.04, + "learning_rate": 1.999620563082244e-05, + "loss": 0.6088, + "step": 62 + }, + { + "epoch": 0.04, + "learning_rate": 1.999605829998363e-05, + "loss": 0.5991, + "step": 63 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995908163587607e-05, + "loss": 0.621, + "step": 64 + }, + { + "epoch": 0.04, + "learning_rate": 1.999575522167651e-05, + "loss": 0.6172, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995599474293272e-05, + "loss": 0.6655, + "step": 66 + }, + { + "epoch": 0.05, + "learning_rate": 1.9995440921481605e-05, + "loss": 0.6356, + "step": 67 + }, + { + "epoch": 0.05, + "learning_rate": 1.9995279563286004e-05, + "loss": 0.5898, + "step": 68 + }, + { + "epoch": 0.05, + "learning_rate": 1.999511539975176e-05, + "loss": 0.5648, + "step": 69 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994948430924944e-05, + "loss": 0.629, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994778656852428e-05, + "loss": 0.6588, + "step": 71 + }, + { + "epoch": 0.05, + "learning_rate": 1.999460607758185e-05, + "loss": 0.5803, + "step": 72 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994430693161662e-05, + "loss": 0.5956, + "step": 73 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994252503641076e-05, + "loss": 0.594, + "step": 74 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994071509070104e-05, + "loss": 0.5705, + "step": 75 + }, + { + "epoch": 0.05, + "learning_rate": 1.9993887709499553e-05, + "loss": 0.6173, + "step": 76 + }, + { + "epoch": 0.05, + "learning_rate": 1.9993701104981003e-05, + "loss": 0.6115, + "step": 77 + }, + { + "epoch": 0.05, + "learning_rate": 1.999351169556683e-05, + "loss": 0.6033, + "step": 78 + }, + { + "epoch": 0.05, + "learning_rate": 1.9993319481310195e-05, + "loss": 0.6066, + "step": 79 + }, + { + "epoch": 0.05, + "learning_rate": 1.9993124462265045e-05, + "loss": 0.5697, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992926638486118e-05, + "loss": 0.6293, + "step": 81 + }, + { + "epoch": 0.06, + "learning_rate": 1.9992726010028935e-05, + "loss": 0.6349, + "step": 82 + }, + { + "epoch": 0.06, + "learning_rate": 1.9992522576949803e-05, + "loss": 0.5912, + "step": 83 + }, + { + "epoch": 0.06, + "learning_rate": 1.999231633930582e-05, + "loss": 0.6073, + "step": 84 + }, + { + "epoch": 0.06, + "learning_rate": 1.9992107297154872e-05, + "loss": 0.5638, + "step": 85 + }, + { + "epoch": 0.06, + "learning_rate": 1.999189545055563e-05, + "loss": 0.587, + "step": 86 + }, + { + "epoch": 0.06, + "learning_rate": 1.999168079956755e-05, + "loss": 0.6084, + "step": 87 + }, + { + "epoch": 0.06, + "learning_rate": 1.999146334425088e-05, + "loss": 0.5811, + "step": 88 + }, + { + "epoch": 0.06, + "learning_rate": 1.999124308466665e-05, + "loss": 0.5603, + "step": 89 + }, + { + "epoch": 0.06, + "learning_rate": 1.9991020020876676e-05, + "loss": 0.6015, + "step": 90 + }, + { + "epoch": 0.06, + "learning_rate": 1.9990794152943574e-05, + "loss": 0.63, + "step": 91 + }, + { + "epoch": 0.06, + "learning_rate": 1.9990565480930734e-05, + "loss": 0.6245, + "step": 92 + }, + { + "epoch": 0.06, + "learning_rate": 1.999033400490233e-05, + "loss": 0.626, + "step": 93 + }, + { + "epoch": 0.06, + "learning_rate": 1.9990099724923337e-05, + "loss": 0.5502, + "step": 94 + }, + { + "epoch": 0.06, + "learning_rate": 1.9989862641059504e-05, + "loss": 0.6085, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 1.998962275337738e-05, + "loss": 0.6174, + "step": 96 + }, + { + "epoch": 0.07, + "learning_rate": 1.998938006194429e-05, + "loss": 0.6137, + "step": 97 + }, + { + "epoch": 0.07, + "learning_rate": 1.9989134566828344e-05, + "loss": 0.5377, + "step": 98 + }, + { + "epoch": 0.07, + "learning_rate": 1.9988886268098446e-05, + "loss": 0.6077, + "step": 99 + }, + { + "epoch": 0.07, + "learning_rate": 1.9988635165824293e-05, + "loss": 0.5585, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 1.998838126007635e-05, + "loss": 0.6025, + "step": 101 + }, + { + "epoch": 0.07, + "learning_rate": 1.998812455092589e-05, + "loss": 0.6041, + "step": 102 + }, + { + "epoch": 0.07, + "learning_rate": 1.9987865038444955e-05, + "loss": 0.5728, + "step": 103 + }, + { + "epoch": 0.07, + "learning_rate": 1.9987602722706387e-05, + "loss": 0.5588, + "step": 104 + }, + { + "epoch": 0.07, + "learning_rate": 1.9987337603783806e-05, + "loss": 0.5903, + "step": 105 + }, + { + "epoch": 0.07, + "learning_rate": 1.998706968175162e-05, + "loss": 0.5814, + "step": 106 + }, + { + "epoch": 0.07, + "learning_rate": 1.9986798956685033e-05, + "loss": 0.6156, + "step": 107 + }, + { + "epoch": 0.07, + "learning_rate": 1.9986525428660018e-05, + "loss": 0.5985, + "step": 108 + }, + { + "epoch": 0.07, + "learning_rate": 1.998624909775335e-05, + "loss": 0.6005, + "step": 109 + }, + { + "epoch": 0.07, + "learning_rate": 1.998596996404259e-05, + "loss": 0.576, + "step": 110 + }, + { + "epoch": 0.07, + "learning_rate": 1.998568802760608e-05, + "loss": 0.5777, + "step": 111 + }, + { + "epoch": 0.08, + "learning_rate": 1.998540328852294e-05, + "loss": 0.6087, + "step": 112 + }, + { + "epoch": 0.08, + "learning_rate": 1.99851157468731e-05, + "loss": 0.5819, + "step": 113 + }, + { + "epoch": 0.08, + "learning_rate": 1.9984825402737262e-05, + "loss": 0.6039, + "step": 114 + }, + { + "epoch": 0.08, + "learning_rate": 1.9984532256196905e-05, + "loss": 0.6003, + "step": 115 + }, + { + "epoch": 0.08, + "learning_rate": 1.9984236307334313e-05, + "loss": 0.6095, + "step": 116 + }, + { + "epoch": 0.08, + "learning_rate": 1.998393755623255e-05, + "loss": 0.6607, + "step": 117 + }, + { + "epoch": 0.08, + "learning_rate": 1.9983636002975462e-05, + "loss": 0.6127, + "step": 118 + }, + { + "epoch": 0.08, + "learning_rate": 1.9983331647647687e-05, + "loss": 0.6099, + "step": 119 + }, + { + "epoch": 0.08, + "learning_rate": 1.9983024490334645e-05, + "loss": 0.6032, + "step": 120 + }, + { + "epoch": 0.08, + "learning_rate": 1.998271453112255e-05, + "loss": 0.5811, + "step": 121 + }, + { + "epoch": 0.08, + "learning_rate": 1.998240177009839e-05, + "loss": 0.6107, + "step": 122 + }, + { + "epoch": 0.08, + "learning_rate": 1.998208620734995e-05, + "loss": 0.5947, + "step": 123 + }, + { + "epoch": 0.08, + "learning_rate": 1.99817678429658e-05, + "loss": 0.5572, + "step": 124 + }, + { + "epoch": 0.08, + "learning_rate": 1.998144667703529e-05, + "loss": 0.5635, + "step": 125 + }, + { + "epoch": 0.08, + "learning_rate": 1.9981122709648558e-05, + "loss": 0.5965, + "step": 126 + }, + { + "epoch": 0.09, + "learning_rate": 1.9980795940896544e-05, + "loss": 0.6202, + "step": 127 + }, + { + "epoch": 0.09, + "learning_rate": 1.9980466370870947e-05, + "loss": 0.556, + "step": 128 + }, + { + "epoch": 0.09, + "learning_rate": 1.9980133999664272e-05, + "loss": 0.6325, + "step": 129 + }, + { + "epoch": 0.09, + "learning_rate": 1.99797988273698e-05, + "loss": 0.6009, + "step": 130 + }, + { + "epoch": 0.09, + "learning_rate": 1.997946085408161e-05, + "loss": 0.6068, + "step": 131 + }, + { + "epoch": 0.09, + "learning_rate": 1.9979120079894558e-05, + "loss": 0.5491, + "step": 132 + }, + { + "epoch": 0.09, + "learning_rate": 1.9978776504904282e-05, + "loss": 0.5876, + "step": 133 + }, + { + "epoch": 0.09, + "learning_rate": 1.997843012920722e-05, + "loss": 0.5731, + "step": 134 + }, + { + "epoch": 0.09, + "learning_rate": 1.997808095290058e-05, + "loss": 0.5959, + "step": 135 + }, + { + "epoch": 0.09, + "learning_rate": 1.9977728976082367e-05, + "loss": 0.5916, + "step": 136 + }, + { + "epoch": 0.09, + "learning_rate": 1.9977374198851374e-05, + "loss": 0.5775, + "step": 137 + }, + { + "epoch": 0.09, + "learning_rate": 1.9977016621307167e-05, + "loss": 0.6175, + "step": 138 + }, + { + "epoch": 0.09, + "learning_rate": 1.9976656243550115e-05, + "loss": 0.6082, + "step": 139 + }, + { + "epoch": 0.09, + "learning_rate": 1.9976293065681355e-05, + "loss": 0.5714, + "step": 140 + }, + { + "epoch": 0.09, + "learning_rate": 1.9975927087802822e-05, + "loss": 0.5886, + "step": 141 + }, + { + "epoch": 0.1, + "learning_rate": 1.9975558310017238e-05, + "loss": 0.5499, + "step": 142 + }, + { + "epoch": 0.1, + "learning_rate": 1.9975186732428102e-05, + "loss": 0.5873, + "step": 143 + }, + { + "epoch": 0.1, + "learning_rate": 1.99748123551397e-05, + "loss": 0.588, + "step": 144 + }, + { + "epoch": 0.1, + "learning_rate": 1.9974435178257114e-05, + "loss": 0.5705, + "step": 145 + }, + { + "epoch": 0.1, + "learning_rate": 1.99740552018862e-05, + "loss": 0.5878, + "step": 146 + }, + { + "epoch": 0.1, + "learning_rate": 1.997367242613361e-05, + "loss": 0.5783, + "step": 147 + }, + { + "epoch": 0.1, + "learning_rate": 1.997328685110677e-05, + "loss": 0.6105, + "step": 148 + }, + { + "epoch": 0.1, + "learning_rate": 1.9972898476913906e-05, + "loss": 0.5822, + "step": 149 + }, + { + "epoch": 0.1, + "learning_rate": 1.997250730366401e-05, + "loss": 0.5816, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9972113331466883e-05, + "loss": 0.6001, + "step": 151 + }, + { + "epoch": 0.1, + "learning_rate": 1.997171656043309e-05, + "loss": 0.567, + "step": 152 + }, + { + "epoch": 0.1, + "learning_rate": 1.9971316990673997e-05, + "loss": 0.5925, + "step": 153 + }, + { + "epoch": 0.1, + "learning_rate": 1.997091462230175e-05, + "loss": 0.5198, + "step": 154 + }, + { + "epoch": 0.1, + "learning_rate": 1.997050945542928e-05, + "loss": 0.6043, + "step": 155 + }, + { + "epoch": 0.11, + "learning_rate": 1.99701014901703e-05, + "loss": 0.5603, + "step": 156 + }, + { + "epoch": 0.11, + "learning_rate": 1.996969072663931e-05, + "loss": 0.544, + "step": 157 + }, + { + "epoch": 0.11, + "learning_rate": 1.9969277164951612e-05, + "loss": 0.6085, + "step": 158 + }, + { + "epoch": 0.11, + "learning_rate": 1.9968860805223264e-05, + "loss": 0.5777, + "step": 159 + }, + { + "epoch": 0.11, + "learning_rate": 1.9968441647571124e-05, + "loss": 0.5832, + "step": 160 + }, + { + "epoch": 0.11, + "learning_rate": 1.996801969211285e-05, + "loss": 0.5975, + "step": 161 + }, + { + "epoch": 0.11, + "learning_rate": 1.9967594938966856e-05, + "loss": 0.5982, + "step": 162 + }, + { + "epoch": 0.11, + "learning_rate": 1.9967167388252358e-05, + "loss": 0.5485, + "step": 163 + }, + { + "epoch": 0.11, + "learning_rate": 1.9966737040089362e-05, + "loss": 0.5663, + "step": 164 + }, + { + "epoch": 0.11, + "learning_rate": 1.9966303894598645e-05, + "loss": 0.5549, + "step": 165 + }, + { + "epoch": 0.11, + "learning_rate": 1.9965867951901783e-05, + "loss": 0.5951, + "step": 166 + }, + { + "epoch": 0.11, + "learning_rate": 1.996542921212113e-05, + "loss": 0.5974, + "step": 167 + }, + { + "epoch": 0.11, + "learning_rate": 1.996498767537982e-05, + "loss": 0.597, + "step": 168 + }, + { + "epoch": 0.11, + "learning_rate": 1.9964543341801778e-05, + "loss": 0.5683, + "step": 169 + }, + { + "epoch": 0.11, + "learning_rate": 1.996409621151172e-05, + "loss": 0.5452, + "step": 170 + }, + { + "epoch": 0.12, + "learning_rate": 1.9963646284635134e-05, + "loss": 0.6023, + "step": 171 + }, + { + "epoch": 0.12, + "learning_rate": 1.99631935612983e-05, + "loss": 0.598, + "step": 172 + }, + { + "epoch": 0.12, + "learning_rate": 1.9962738041628286e-05, + "loss": 0.6609, + "step": 173 + }, + { + "epoch": 0.12, + "learning_rate": 1.996227972575294e-05, + "loss": 0.5362, + "step": 174 + }, + { + "epoch": 0.12, + "learning_rate": 1.9961818613800892e-05, + "loss": 0.6304, + "step": 175 + }, + { + "epoch": 0.12, + "learning_rate": 1.9961354705901567e-05, + "loss": 0.573, + "step": 176 + }, + { + "epoch": 0.12, + "learning_rate": 1.996088800218516e-05, + "loss": 0.585, + "step": 177 + }, + { + "epoch": 0.12, + "learning_rate": 1.9960418502782663e-05, + "loss": 0.5762, + "step": 178 + }, + { + "epoch": 0.12, + "learning_rate": 1.995994620782585e-05, + "loss": 0.6077, + "step": 179 + }, + { + "epoch": 0.12, + "learning_rate": 1.995947111744728e-05, + "loss": 0.5582, + "step": 180 + }, + { + "epoch": 0.12, + "learning_rate": 1.9958993231780294e-05, + "loss": 0.5809, + "step": 181 + }, + { + "epoch": 0.12, + "learning_rate": 1.9958512550959013e-05, + "loss": 0.5751, + "step": 182 + }, + { + "epoch": 0.12, + "learning_rate": 1.995802907511835e-05, + "loss": 0.6164, + "step": 183 + }, + { + "epoch": 0.12, + "learning_rate": 1.9957542804394008e-05, + "loss": 0.5551, + "step": 184 + }, + { + "epoch": 0.12, + "learning_rate": 1.995705373892246e-05, + "loss": 0.5957, + "step": 185 + }, + { + "epoch": 0.13, + "learning_rate": 1.9956561878840967e-05, + "loss": 0.5508, + "step": 186 + }, + { + "epoch": 0.13, + "learning_rate": 1.9956067224287585e-05, + "loss": 0.5901, + "step": 187 + }, + { + "epoch": 0.13, + "learning_rate": 1.9955569775401144e-05, + "loss": 0.5543, + "step": 188 + }, + { + "epoch": 0.13, + "learning_rate": 1.9955069532321257e-05, + "loss": 0.6488, + "step": 189 + }, + { + "epoch": 0.13, + "learning_rate": 1.9954566495188333e-05, + "loss": 0.5791, + "step": 190 + }, + { + "epoch": 0.13, + "learning_rate": 1.9954060664143555e-05, + "loss": 0.5738, + "step": 191 + }, + { + "epoch": 0.13, + "learning_rate": 1.995355203932889e-05, + "loss": 0.6277, + "step": 192 + }, + { + "epoch": 0.13, + "learning_rate": 1.9953040620887093e-05, + "loss": 0.6511, + "step": 193 + }, + { + "epoch": 0.13, + "learning_rate": 1.9952526408961703e-05, + "loss": 0.6071, + "step": 194 + }, + { + "epoch": 0.13, + "learning_rate": 1.995200940369704e-05, + "loss": 0.5688, + "step": 195 + }, + { + "epoch": 0.13, + "learning_rate": 1.9951489605238214e-05, + "loss": 0.5631, + "step": 196 + }, + { + "epoch": 0.13, + "learning_rate": 1.995096701373111e-05, + "loss": 0.5913, + "step": 197 + }, + { + "epoch": 0.13, + "learning_rate": 1.9950441629322407e-05, + "loss": 0.5427, + "step": 198 + }, + { + "epoch": 0.13, + "learning_rate": 1.9949913452159556e-05, + "loss": 0.5748, + "step": 199 + }, + { + "epoch": 0.13, + "learning_rate": 1.9949382482390803e-05, + "loss": 0.5769, + "step": 200 + }, + { + "epoch": 0.14, + "learning_rate": 1.9948848720165178e-05, + "loss": 0.5554, + "step": 201 + }, + { + "epoch": 0.14, + "learning_rate": 1.994831216563248e-05, + "loss": 0.6145, + "step": 202 + }, + { + "epoch": 0.14, + "learning_rate": 1.9947772818943308e-05, + "loss": 0.5717, + "step": 203 + }, + { + "epoch": 0.14, + "learning_rate": 1.9947230680249036e-05, + "loss": 0.5828, + "step": 204 + }, + { + "epoch": 0.14, + "learning_rate": 1.994668574970183e-05, + "loss": 0.5667, + "step": 205 + }, + { + "epoch": 0.14, + "learning_rate": 1.9946138027454624e-05, + "loss": 0.5935, + "step": 206 + }, + { + "epoch": 0.14, + "learning_rate": 1.994558751366115e-05, + "loss": 0.5763, + "step": 207 + }, + { + "epoch": 0.14, + "learning_rate": 1.9945034208475922e-05, + "loss": 0.5388, + "step": 208 + }, + { + "epoch": 0.14, + "learning_rate": 1.994447811205423e-05, + "loss": 0.5679, + "step": 209 + }, + { + "epoch": 0.14, + "learning_rate": 1.9943919224552154e-05, + "loss": 0.5664, + "step": 210 + }, + { + "epoch": 0.14, + "learning_rate": 1.9943357546126548e-05, + "loss": 0.6093, + "step": 211 + }, + { + "epoch": 0.14, + "learning_rate": 1.9942793076935067e-05, + "loss": 0.6113, + "step": 212 + }, + { + "epoch": 0.14, + "learning_rate": 1.9942225817136132e-05, + "loss": 0.5914, + "step": 213 + }, + { + "epoch": 0.14, + "learning_rate": 1.9941655766888956e-05, + "loss": 0.549, + "step": 214 + }, + { + "epoch": 0.14, + "learning_rate": 1.994108292635353e-05, + "loss": 0.5554, + "step": 215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9940507295690632e-05, + "loss": 0.5492, + "step": 216 + }, + { + "epoch": 0.15, + "learning_rate": 1.9939928875061825e-05, + "loss": 0.5747, + "step": 217 + }, + { + "epoch": 0.15, + "learning_rate": 1.9939347664629453e-05, + "loss": 0.5847, + "step": 218 + }, + { + "epoch": 0.15, + "learning_rate": 1.9938763664556634e-05, + "loss": 0.6064, + "step": 219 + }, + { + "epoch": 0.15, + "learning_rate": 1.9938176875007284e-05, + "loss": 0.5568, + "step": 220 + }, + { + "epoch": 0.15, + "learning_rate": 1.9937587296146095e-05, + "loss": 0.5476, + "step": 221 + }, + { + "epoch": 0.15, + "learning_rate": 1.9936994928138543e-05, + "loss": 0.5761, + "step": 222 + }, + { + "epoch": 0.15, + "learning_rate": 1.993639977115088e-05, + "loss": 0.5651, + "step": 223 + }, + { + "epoch": 0.15, + "learning_rate": 1.993580182535015e-05, + "loss": 0.5788, + "step": 224 + }, + { + "epoch": 0.15, + "learning_rate": 1.9935201090904177e-05, + "loss": 0.5649, + "step": 225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9934597567981567e-05, + "loss": 0.564, + "step": 226 + }, + { + "epoch": 0.15, + "learning_rate": 1.9933991256751707e-05, + "loss": 0.5939, + "step": 227 + }, + { + "epoch": 0.15, + "learning_rate": 1.993338215738477e-05, + "loss": 0.5858, + "step": 228 + }, + { + "epoch": 0.15, + "learning_rate": 1.9932770270051706e-05, + "loss": 0.5521, + "step": 229 + }, + { + "epoch": 0.15, + "learning_rate": 1.993215559492426e-05, + "loss": 0.5711, + "step": 230 + }, + { + "epoch": 0.16, + "learning_rate": 1.993153813217494e-05, + "loss": 0.5948, + "step": 231 + }, + { + "epoch": 0.16, + "learning_rate": 1.9930917881977053e-05, + "loss": 0.5852, + "step": 232 + }, + { + "epoch": 0.16, + "learning_rate": 1.9930294844504677e-05, + "loss": 0.6058, + "step": 233 + }, + { + "epoch": 0.16, + "learning_rate": 1.9929669019932686e-05, + "loss": 0.603, + "step": 234 + }, + { + "epoch": 0.16, + "learning_rate": 1.992904040843672e-05, + "loss": 0.5764, + "step": 235 + }, + { + "epoch": 0.16, + "learning_rate": 1.9928409010193213e-05, + "loss": 0.5627, + "step": 236 + }, + { + "epoch": 0.16, + "learning_rate": 1.992777482537938e-05, + "loss": 0.5668, + "step": 237 + }, + { + "epoch": 0.16, + "learning_rate": 1.992713785417321e-05, + "loss": 0.5348, + "step": 238 + }, + { + "epoch": 0.16, + "learning_rate": 1.992649809675348e-05, + "loss": 0.59, + "step": 239 + }, + { + "epoch": 0.16, + "learning_rate": 1.9925855553299755e-05, + "loss": 0.5641, + "step": 240 + }, + { + "epoch": 0.16, + "learning_rate": 1.9925210223992365e-05, + "loss": 0.5417, + "step": 241 + }, + { + "epoch": 0.16, + "learning_rate": 1.992456210901244e-05, + "loss": 0.5635, + "step": 242 + }, + { + "epoch": 0.16, + "learning_rate": 1.992391120854188e-05, + "loss": 0.5527, + "step": 243 + }, + { + "epoch": 0.16, + "learning_rate": 1.9923257522763373e-05, + "loss": 0.5966, + "step": 244 + }, + { + "epoch": 0.16, + "learning_rate": 1.9922601051860386e-05, + "loss": 0.5538, + "step": 245 + }, + { + "epoch": 0.17, + "learning_rate": 1.9921941796017168e-05, + "loss": 0.6451, + "step": 246 + }, + { + "epoch": 0.17, + "learning_rate": 1.992127975541875e-05, + "loss": 0.5891, + "step": 247 + }, + { + "epoch": 0.17, + "learning_rate": 1.9920614930250945e-05, + "loss": 0.5767, + "step": 248 + }, + { + "epoch": 0.17, + "learning_rate": 1.9919947320700346e-05, + "loss": 0.573, + "step": 249 + }, + { + "epoch": 0.17, + "learning_rate": 1.991927692695433e-05, + "loss": 0.5795, + "step": 250 + }, + { + "epoch": 0.17, + "learning_rate": 1.9918603749201053e-05, + "loss": 0.5831, + "step": 251 + }, + { + "epoch": 0.17, + "learning_rate": 1.9917927787629454e-05, + "loss": 0.5439, + "step": 252 + }, + { + "epoch": 0.17, + "learning_rate": 1.9917249042429253e-05, + "loss": 0.5367, + "step": 253 + }, + { + "epoch": 0.17, + "learning_rate": 1.9916567513790946e-05, + "loss": 0.6075, + "step": 254 + }, + { + "epoch": 0.17, + "learning_rate": 1.9915883201905824e-05, + "loss": 0.5493, + "step": 255 + }, + { + "epoch": 0.17, + "learning_rate": 1.9915196106965944e-05, + "loss": 0.5906, + "step": 256 + }, + { + "epoch": 0.17, + "learning_rate": 1.991450622916415e-05, + "loss": 0.5685, + "step": 257 + }, + { + "epoch": 0.17, + "learning_rate": 1.9913813568694076e-05, + "loss": 0.583, + "step": 258 + }, + { + "epoch": 0.17, + "learning_rate": 1.9913118125750115e-05, + "loss": 0.5709, + "step": 259 + }, + { + "epoch": 0.18, + "learning_rate": 1.9912419900527467e-05, + "loss": 0.5741, + "step": 260 + }, + { + "epoch": 0.18, + "learning_rate": 1.9911718893222088e-05, + "loss": 0.5701, + "step": 261 + }, + { + "epoch": 0.18, + "learning_rate": 1.991101510403074e-05, + "loss": 0.5916, + "step": 262 + }, + { + "epoch": 0.18, + "learning_rate": 1.9910308533150946e-05, + "loss": 0.5826, + "step": 263 + }, + { + "epoch": 0.18, + "learning_rate": 1.9909599180781016e-05, + "loss": 0.5845, + "step": 264 + }, + { + "epoch": 0.18, + "learning_rate": 1.9908887047120046e-05, + "loss": 0.5835, + "step": 265 + }, + { + "epoch": 0.18, + "learning_rate": 1.99081721323679e-05, + "loss": 0.611, + "step": 266 + }, + { + "epoch": 0.18, + "learning_rate": 1.9907454436725237e-05, + "loss": 0.5917, + "step": 267 + }, + { + "epoch": 0.18, + "learning_rate": 1.990673396039349e-05, + "loss": 0.5403, + "step": 268 + }, + { + "epoch": 0.18, + "learning_rate": 1.990601070357487e-05, + "loss": 0.5883, + "step": 269 + }, + { + "epoch": 0.18, + "learning_rate": 1.9905284666472374e-05, + "loss": 0.5528, + "step": 270 + }, + { + "epoch": 0.18, + "learning_rate": 1.990455584928977e-05, + "loss": 0.5626, + "step": 271 + }, + { + "epoch": 0.18, + "learning_rate": 1.990382425223162e-05, + "loss": 0.5666, + "step": 272 + }, + { + "epoch": 0.18, + "learning_rate": 1.9903089875503252e-05, + "loss": 0.5835, + "step": 273 + }, + { + "epoch": 0.18, + "learning_rate": 1.9902352719310784e-05, + "loss": 0.5518, + "step": 274 + }, + { + "epoch": 0.19, + "learning_rate": 1.9901612783861114e-05, + "loss": 0.6029, + "step": 275 + }, + { + "epoch": 0.19, + "learning_rate": 1.9900870069361914e-05, + "loss": 0.5343, + "step": 276 + }, + { + "epoch": 0.19, + "learning_rate": 1.9900124576021637e-05, + "loss": 0.5709, + "step": 277 + }, + { + "epoch": 0.19, + "learning_rate": 1.9899376304049516e-05, + "loss": 0.5802, + "step": 278 + }, + { + "epoch": 0.19, + "learning_rate": 1.9898625253655574e-05, + "loss": 0.5744, + "step": 279 + }, + { + "epoch": 0.19, + "learning_rate": 1.9897871425050598e-05, + "loss": 0.6037, + "step": 280 + }, + { + "epoch": 0.19, + "learning_rate": 1.989711481844617e-05, + "loss": 0.568, + "step": 281 + }, + { + "epoch": 0.19, + "learning_rate": 1.9896355434054636e-05, + "loss": 0.5699, + "step": 282 + }, + { + "epoch": 0.19, + "learning_rate": 1.9895593272089135e-05, + "loss": 0.5644, + "step": 283 + }, + { + "epoch": 0.19, + "learning_rate": 1.9894828332763573e-05, + "loss": 0.5546, + "step": 284 + }, + { + "epoch": 0.19, + "learning_rate": 1.989406061629265e-05, + "loss": 0.5781, + "step": 285 + }, + { + "epoch": 0.19, + "learning_rate": 1.989329012289184e-05, + "loss": 0.5749, + "step": 286 + }, + { + "epoch": 0.19, + "learning_rate": 1.989251685277739e-05, + "loss": 0.5518, + "step": 287 + }, + { + "epoch": 0.19, + "learning_rate": 1.989174080616633e-05, + "loss": 0.6277, + "step": 288 + }, + { + "epoch": 0.19, + "learning_rate": 1.9890961983276472e-05, + "loss": 0.569, + "step": 289 + }, + { + "epoch": 0.2, + "learning_rate": 1.9890180384326404e-05, + "loss": 0.5386, + "step": 290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9889396009535496e-05, + "loss": 0.5793, + "step": 291 + }, + { + "epoch": 0.2, + "learning_rate": 1.9888608859123895e-05, + "loss": 0.5583, + "step": 292 + }, + { + "epoch": 0.2, + "learning_rate": 1.9887818933312532e-05, + "loss": 0.5799, + "step": 293 + }, + { + "epoch": 0.2, + "learning_rate": 1.9887026232323105e-05, + "loss": 0.5621, + "step": 294 + }, + { + "epoch": 0.2, + "learning_rate": 1.9886230756378102e-05, + "loss": 0.5738, + "step": 295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9885432505700786e-05, + "loss": 0.5704, + "step": 296 + }, + { + "epoch": 0.2, + "learning_rate": 1.98846314805152e-05, + "loss": 0.6115, + "step": 297 + }, + { + "epoch": 0.2, + "learning_rate": 1.9883827681046167e-05, + "loss": 0.5488, + "step": 298 + }, + { + "epoch": 0.2, + "learning_rate": 1.988302110751928e-05, + "loss": 0.5473, + "step": 299 + }, + { + "epoch": 0.2, + "learning_rate": 1.9882211760160924e-05, + "loss": 0.5707, + "step": 300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9881399639198248e-05, + "loss": 0.5873, + "step": 301 + }, + { + "epoch": 0.2, + "learning_rate": 1.9880584744859195e-05, + "loss": 0.5659, + "step": 302 + }, + { + "epoch": 0.2, + "learning_rate": 1.9879767077372474e-05, + "loss": 0.5966, + "step": 303 + }, + { + "epoch": 0.2, + "learning_rate": 1.9878946636967576e-05, + "loss": 0.5478, + "step": 304 + }, + { + "epoch": 0.21, + "learning_rate": 1.9878123423874777e-05, + "loss": 0.5877, + "step": 305 + }, + { + "epoch": 0.21, + "learning_rate": 1.9877297438325115e-05, + "loss": 0.597, + "step": 306 + }, + { + "epoch": 0.21, + "learning_rate": 1.9876468680550425e-05, + "loss": 0.5697, + "step": 307 + }, + { + "epoch": 0.21, + "learning_rate": 1.9875637150783307e-05, + "loss": 0.5785, + "step": 308 + }, + { + "epoch": 0.21, + "learning_rate": 1.9874802849257148e-05, + "loss": 0.5755, + "step": 309 + }, + { + "epoch": 0.21, + "learning_rate": 1.9873965776206103e-05, + "loss": 0.5772, + "step": 310 + }, + { + "epoch": 0.21, + "learning_rate": 1.9873125931865113e-05, + "loss": 0.5743, + "step": 311 + }, + { + "epoch": 0.21, + "learning_rate": 1.9872283316469893e-05, + "loss": 0.5606, + "step": 312 + }, + { + "epoch": 0.21, + "learning_rate": 1.987143793025694e-05, + "loss": 0.598, + "step": 313 + }, + { + "epoch": 0.21, + "learning_rate": 1.9870589773463517e-05, + "loss": 0.5784, + "step": 314 + }, + { + "epoch": 0.21, + "learning_rate": 1.9869738846327685e-05, + "loss": 0.6226, + "step": 315 + }, + { + "epoch": 0.21, + "learning_rate": 1.9868885149088262e-05, + "loss": 0.571, + "step": 316 + }, + { + "epoch": 0.21, + "learning_rate": 1.9868028681984857e-05, + "loss": 0.5919, + "step": 317 + }, + { + "epoch": 0.21, + "learning_rate": 1.9867169445257848e-05, + "loss": 0.5657, + "step": 318 + }, + { + "epoch": 0.21, + "learning_rate": 1.986630743914839e-05, + "loss": 0.516, + "step": 319 + }, + { + "epoch": 0.22, + "learning_rate": 1.986544266389843e-05, + "loss": 0.568, + "step": 320 + }, + { + "epoch": 0.22, + "learning_rate": 1.9864575119750677e-05, + "loss": 0.603, + "step": 321 + }, + { + "epoch": 0.22, + "learning_rate": 1.9863704806948612e-05, + "loss": 0.5999, + "step": 322 + }, + { + "epoch": 0.22, + "learning_rate": 1.986283172573652e-05, + "loss": 0.5558, + "step": 323 + }, + { + "epoch": 0.22, + "learning_rate": 1.9861955876359428e-05, + "loss": 0.5507, + "step": 324 + }, + { + "epoch": 0.22, + "learning_rate": 1.986107725906317e-05, + "loss": 0.5849, + "step": 325 + }, + { + "epoch": 0.22, + "learning_rate": 1.986019587409434e-05, + "loss": 0.5876, + "step": 326 + }, + { + "epoch": 0.22, + "learning_rate": 1.9859311721700306e-05, + "loss": 0.5929, + "step": 327 + }, + { + "epoch": 0.22, + "learning_rate": 1.9858424802129232e-05, + "loss": 0.5592, + "step": 328 + }, + { + "epoch": 0.22, + "learning_rate": 1.985753511563004e-05, + "loss": 0.5555, + "step": 329 + }, + { + "epoch": 0.22, + "learning_rate": 1.9856642662452437e-05, + "loss": 0.6035, + "step": 330 + }, + { + "epoch": 0.22, + "learning_rate": 1.9855747442846903e-05, + "loss": 0.5433, + "step": 331 + }, + { + "epoch": 0.22, + "learning_rate": 1.9854849457064697e-05, + "loss": 0.5967, + "step": 332 + }, + { + "epoch": 0.22, + "learning_rate": 1.985394870535785e-05, + "loss": 0.5547, + "step": 333 + }, + { + "epoch": 0.22, + "learning_rate": 1.9853045187979184e-05, + "loss": 0.5452, + "step": 334 + }, + { + "epoch": 0.23, + "learning_rate": 1.985213890518227e-05, + "loss": 0.5866, + "step": 335 + }, + { + "epoch": 0.23, + "learning_rate": 1.9851229857221483e-05, + "loss": 0.5557, + "step": 336 + }, + { + "epoch": 0.23, + "learning_rate": 1.9850318044351958e-05, + "loss": 0.5449, + "step": 337 + }, + { + "epoch": 0.23, + "learning_rate": 1.9849403466829607e-05, + "loss": 0.6002, + "step": 338 + }, + { + "epoch": 0.23, + "learning_rate": 1.9848486124911125e-05, + "loss": 0.5653, + "step": 339 + }, + { + "epoch": 0.23, + "learning_rate": 1.984756601885398e-05, + "loss": 0.5496, + "step": 340 + }, + { + "epoch": 0.23, + "learning_rate": 1.9846643148916414e-05, + "loss": 0.5881, + "step": 341 + }, + { + "epoch": 0.23, + "learning_rate": 1.9845717515357443e-05, + "loss": 0.5904, + "step": 342 + }, + { + "epoch": 0.23, + "learning_rate": 1.9844789118436863e-05, + "loss": 0.5717, + "step": 343 + }, + { + "epoch": 0.23, + "learning_rate": 1.9843857958415243e-05, + "loss": 0.5834, + "step": 344 + }, + { + "epoch": 0.23, + "learning_rate": 1.984292403555393e-05, + "loss": 0.564, + "step": 345 + }, + { + "epoch": 0.23, + "learning_rate": 1.9841987350115043e-05, + "loss": 0.5871, + "step": 346 + }, + { + "epoch": 0.23, + "learning_rate": 1.9841047902361475e-05, + "loss": 0.5872, + "step": 347 + }, + { + "epoch": 0.23, + "learning_rate": 1.984010569255691e-05, + "loss": 0.5544, + "step": 348 + }, + { + "epoch": 0.23, + "learning_rate": 1.983916072096578e-05, + "loss": 0.5841, + "step": 349 + }, + { + "epoch": 0.24, + "learning_rate": 1.9838212987853312e-05, + "loss": 0.5427, + "step": 350 + }, + { + "epoch": 0.24, + "learning_rate": 1.9837262493485505e-05, + "loss": 0.5476, + "step": 351 + }, + { + "epoch": 0.24, + "learning_rate": 1.9836309238129127e-05, + "loss": 0.539, + "step": 352 + }, + { + "epoch": 0.24, + "learning_rate": 1.9835353222051728e-05, + "loss": 0.5847, + "step": 353 + }, + { + "epoch": 0.24, + "learning_rate": 1.983439444552163e-05, + "loss": 0.5792, + "step": 354 + }, + { + "epoch": 0.24, + "learning_rate": 1.9833432908807927e-05, + "loss": 0.5919, + "step": 355 + }, + { + "epoch": 0.24, + "learning_rate": 1.983246861218049e-05, + "loss": 0.5298, + "step": 356 + }, + { + "epoch": 0.24, + "learning_rate": 1.9831501555909964e-05, + "loss": 0.594, + "step": 357 + }, + { + "epoch": 0.24, + "learning_rate": 1.9830531740267772e-05, + "loss": 0.5643, + "step": 358 + }, + { + "epoch": 0.24, + "learning_rate": 1.9829559165526103e-05, + "loss": 0.5999, + "step": 359 + }, + { + "epoch": 0.24, + "learning_rate": 1.9828583831957935e-05, + "loss": 0.5895, + "step": 360 + }, + { + "epoch": 0.24, + "learning_rate": 1.9827605739837004e-05, + "loss": 0.5686, + "step": 361 + }, + { + "epoch": 0.24, + "learning_rate": 1.982662488943783e-05, + "loss": 0.612, + "step": 362 + }, + { + "epoch": 0.24, + "learning_rate": 1.9825641281035707e-05, + "loss": 0.5482, + "step": 363 + }, + { + "epoch": 0.25, + "learning_rate": 1.9824654914906694e-05, + "loss": 0.5584, + "step": 364 + }, + { + "epoch": 0.25, + "learning_rate": 1.9823665791327635e-05, + "loss": 0.5635, + "step": 365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9822673910576148e-05, + "loss": 0.6189, + "step": 366 + }, + { + "epoch": 0.25, + "learning_rate": 1.9821679272930614e-05, + "loss": 0.5824, + "step": 367 + }, + { + "epoch": 0.25, + "learning_rate": 1.9820681878670197e-05, + "loss": 0.5695, + "step": 368 + }, + { + "epoch": 0.25, + "learning_rate": 1.981968172807483e-05, + "loss": 0.5872, + "step": 369 + }, + { + "epoch": 0.25, + "learning_rate": 1.9818678821425227e-05, + "loss": 0.556, + "step": 370 + }, + { + "epoch": 0.25, + "learning_rate": 1.9817673159002864e-05, + "loss": 0.5743, + "step": 371 + }, + { + "epoch": 0.25, + "learning_rate": 1.981666474109e-05, + "loss": 0.5832, + "step": 372 + }, + { + "epoch": 0.25, + "learning_rate": 1.981565356796966e-05, + "loss": 0.6243, + "step": 373 + }, + { + "epoch": 0.25, + "learning_rate": 1.981463963992565e-05, + "loss": 0.5777, + "step": 374 + }, + { + "epoch": 0.25, + "learning_rate": 1.981362295724255e-05, + "loss": 0.6033, + "step": 375 + }, + { + "epoch": 0.25, + "learning_rate": 1.98126035202057e-05, + "loss": 0.5583, + "step": 376 + }, + { + "epoch": 0.25, + "learning_rate": 1.9811581329101224e-05, + "loss": 0.578, + "step": 377 + }, + { + "epoch": 0.25, + "learning_rate": 1.981055638421602e-05, + "loss": 0.6008, + "step": 378 + }, + { + "epoch": 0.26, + "learning_rate": 1.980952868583775e-05, + "loss": 0.5858, + "step": 379 + }, + { + "epoch": 0.26, + "learning_rate": 1.980849823425486e-05, + "loss": 0.5801, + "step": 380 + }, + { + "epoch": 0.26, + "learning_rate": 1.980746502975656e-05, + "loss": 0.5443, + "step": 381 + }, + { + "epoch": 0.26, + "learning_rate": 1.9806429072632835e-05, + "loss": 0.5571, + "step": 382 + }, + { + "epoch": 0.26, + "learning_rate": 1.9805390363174447e-05, + "loss": 0.5739, + "step": 383 + }, + { + "epoch": 0.26, + "learning_rate": 1.9804348901672922e-05, + "loss": 0.5362, + "step": 384 + }, + { + "epoch": 0.26, + "learning_rate": 1.9803304688420568e-05, + "loss": 0.529, + "step": 385 + }, + { + "epoch": 0.26, + "learning_rate": 1.980225772371046e-05, + "loss": 0.5601, + "step": 386 + }, + { + "epoch": 0.26, + "learning_rate": 1.9801208007836442e-05, + "loss": 0.5871, + "step": 387 + }, + { + "epoch": 0.26, + "learning_rate": 1.9800155541093135e-05, + "loss": 0.5552, + "step": 388 + }, + { + "epoch": 0.26, + "learning_rate": 1.9799100323775933e-05, + "loss": 0.5537, + "step": 389 + }, + { + "epoch": 0.26, + "learning_rate": 1.9798042356181e-05, + "loss": 0.5929, + "step": 390 + }, + { + "epoch": 0.26, + "learning_rate": 1.979698163860527e-05, + "loss": 0.5586, + "step": 391 + }, + { + "epoch": 0.26, + "learning_rate": 1.9795918171346448e-05, + "loss": 0.5758, + "step": 392 + }, + { + "epoch": 0.26, + "learning_rate": 1.9794851954703024e-05, + "loss": 0.5652, + "step": 393 + }, + { + "epoch": 0.27, + "learning_rate": 1.9793782988974233e-05, + "loss": 0.5536, + "step": 394 + }, + { + "epoch": 0.27, + "learning_rate": 1.9792711274460116e-05, + "loss": 0.5974, + "step": 395 + }, + { + "epoch": 0.27, + "learning_rate": 1.9791636811461454e-05, + "loss": 0.543, + "step": 396 + }, + { + "epoch": 0.27, + "learning_rate": 1.9790559600279816e-05, + "loss": 0.5734, + "step": 397 + }, + { + "epoch": 0.27, + "learning_rate": 1.9789479641217538e-05, + "loss": 0.5894, + "step": 398 + }, + { + "epoch": 0.27, + "learning_rate": 1.9788396934577733e-05, + "loss": 0.587, + "step": 399 + }, + { + "epoch": 0.27, + "learning_rate": 1.978731148066428e-05, + "loss": 0.5684, + "step": 400 + }, + { + "epoch": 0.27, + "learning_rate": 1.9786223279781823e-05, + "loss": 0.5832, + "step": 401 + }, + { + "epoch": 0.27, + "learning_rate": 1.9785132332235787e-05, + "loss": 0.5801, + "step": 402 + }, + { + "epoch": 0.27, + "learning_rate": 1.9784038638332363e-05, + "loss": 0.5815, + "step": 403 + }, + { + "epoch": 0.27, + "learning_rate": 1.9782942198378516e-05, + "loss": 0.5693, + "step": 404 + }, + { + "epoch": 0.27, + "learning_rate": 1.978184301268198e-05, + "loss": 0.57, + "step": 405 + }, + { + "epoch": 0.27, + "learning_rate": 1.978074108155126e-05, + "loss": 0.5642, + "step": 406 + }, + { + "epoch": 0.27, + "learning_rate": 1.9779636405295624e-05, + "loss": 0.5553, + "step": 407 + }, + { + "epoch": 0.27, + "learning_rate": 1.9778528984225126e-05, + "loss": 0.5476, + "step": 408 + }, + { + "epoch": 0.28, + "learning_rate": 1.977741881865058e-05, + "loss": 0.5916, + "step": 409 + }, + { + "epoch": 0.28, + "learning_rate": 1.977630590888357e-05, + "loss": 0.586, + "step": 410 + }, + { + "epoch": 0.28, + "learning_rate": 1.977519025523645e-05, + "loss": 0.5014, + "step": 411 + }, + { + "epoch": 0.28, + "learning_rate": 1.9774071858022354e-05, + "loss": 0.5413, + "step": 412 + }, + { + "epoch": 0.28, + "learning_rate": 1.9772950717555174e-05, + "loss": 0.567, + "step": 413 + }, + { + "epoch": 0.28, + "learning_rate": 1.9771826834149577e-05, + "loss": 0.599, + "step": 414 + }, + { + "epoch": 0.28, + "learning_rate": 1.9770700208120996e-05, + "loss": 0.607, + "step": 415 + }, + { + "epoch": 0.28, + "learning_rate": 1.9769570839785638e-05, + "loss": 0.5516, + "step": 416 + }, + { + "epoch": 0.28, + "learning_rate": 1.9768438729460487e-05, + "loss": 0.5413, + "step": 417 + }, + { + "epoch": 0.28, + "learning_rate": 1.9767303877463275e-05, + "loss": 0.5653, + "step": 418 + }, + { + "epoch": 0.28, + "learning_rate": 1.976616628411253e-05, + "loss": 0.5838, + "step": 419 + }, + { + "epoch": 0.28, + "learning_rate": 1.9765025949727526e-05, + "loss": 0.5575, + "step": 420 + }, + { + "epoch": 0.28, + "learning_rate": 1.9763882874628318e-05, + "loss": 0.581, + "step": 421 + }, + { + "epoch": 0.28, + "learning_rate": 1.9762737059135735e-05, + "loss": 0.5922, + "step": 422 + }, + { + "epoch": 0.28, + "learning_rate": 1.976158850357136e-05, + "loss": 0.5656, + "step": 423 + }, + { + "epoch": 0.29, + "learning_rate": 1.9760437208257564e-05, + "loss": 0.5436, + "step": 424 + }, + { + "epoch": 0.29, + "learning_rate": 1.975928317351747e-05, + "loss": 0.5676, + "step": 425 + }, + { + "epoch": 0.29, + "learning_rate": 1.9758126399674974e-05, + "loss": 0.6124, + "step": 426 + }, + { + "epoch": 0.29, + "learning_rate": 1.975696688705475e-05, + "loss": 0.5773, + "step": 427 + }, + { + "epoch": 0.29, + "learning_rate": 1.9755804635982233e-05, + "loss": 0.5855, + "step": 428 + }, + { + "epoch": 0.29, + "learning_rate": 1.9754639646783627e-05, + "loss": 0.5294, + "step": 429 + }, + { + "epoch": 0.29, + "learning_rate": 1.975347191978591e-05, + "loss": 0.5977, + "step": 430 + }, + { + "epoch": 0.29, + "learning_rate": 1.9752301455316808e-05, + "loss": 0.5552, + "step": 431 + }, + { + "epoch": 0.29, + "learning_rate": 1.9751128253704848e-05, + "loss": 0.5413, + "step": 432 + }, + { + "epoch": 0.29, + "learning_rate": 1.97499523152793e-05, + "loss": 0.5817, + "step": 433 + }, + { + "epoch": 0.29, + "learning_rate": 1.9748773640370212e-05, + "loss": 0.5867, + "step": 434 + }, + { + "epoch": 0.29, + "learning_rate": 1.97475922293084e-05, + "loss": 0.5578, + "step": 435 + }, + { + "epoch": 0.29, + "learning_rate": 1.9746408082425443e-05, + "loss": 0.5483, + "step": 436 + }, + { + "epoch": 0.29, + "learning_rate": 1.9745221200053694e-05, + "loss": 0.5564, + "step": 437 + }, + { + "epoch": 0.29, + "learning_rate": 1.974403158252627e-05, + "loss": 0.5528, + "step": 438 + }, + { + "epoch": 0.3, + "learning_rate": 1.9742839230177056e-05, + "loss": 0.582, + "step": 439 + }, + { + "epoch": 0.3, + "learning_rate": 1.9741644143340707e-05, + "loss": 0.5711, + "step": 440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9740446322352643e-05, + "loss": 0.6, + "step": 441 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739245767549048e-05, + "loss": 0.5799, + "step": 442 + }, + { + "epoch": 0.3, + "learning_rate": 1.9738042479266886e-05, + "loss": 0.5483, + "step": 443 + }, + { + "epoch": 0.3, + "learning_rate": 1.973683645784387e-05, + "loss": 0.5807, + "step": 444 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735627703618494e-05, + "loss": 0.596, + "step": 445 + }, + { + "epoch": 0.3, + "learning_rate": 1.973441621693002e-05, + "loss": 0.6182, + "step": 446 + }, + { + "epoch": 0.3, + "learning_rate": 1.973320199811846e-05, + "loss": 0.5435, + "step": 447 + }, + { + "epoch": 0.3, + "learning_rate": 1.9731985047524612e-05, + "loss": 0.569, + "step": 448 + }, + { + "epoch": 0.3, + "learning_rate": 1.9730765365490033e-05, + "loss": 0.5865, + "step": 449 + }, + { + "epoch": 0.3, + "learning_rate": 1.9729542952357045e-05, + "loss": 0.5882, + "step": 450 + }, + { + "epoch": 0.3, + "learning_rate": 1.972831780846874e-05, + "loss": 0.5532, + "step": 451 + }, + { + "epoch": 0.3, + "learning_rate": 1.972708993416897e-05, + "loss": 0.5525, + "step": 452 + }, + { + "epoch": 0.31, + "learning_rate": 1.9725859329802363e-05, + "loss": 0.6435, + "step": 453 + }, + { + "epoch": 0.31, + "learning_rate": 1.9724625995714307e-05, + "loss": 0.5676, + "step": 454 + }, + { + "epoch": 0.31, + "learning_rate": 1.9723389932250955e-05, + "loss": 0.5593, + "step": 455 + }, + { + "epoch": 0.31, + "learning_rate": 1.9722151139759232e-05, + "loss": 0.5657, + "step": 456 + }, + { + "epoch": 0.31, + "learning_rate": 1.9720909618586824e-05, + "loss": 0.5595, + "step": 457 + }, + { + "epoch": 0.31, + "learning_rate": 1.971966536908218e-05, + "loss": 0.5849, + "step": 458 + }, + { + "epoch": 0.31, + "learning_rate": 1.9718418391594526e-05, + "loss": 0.5468, + "step": 459 + }, + { + "epoch": 0.31, + "learning_rate": 1.9717168686473845e-05, + "loss": 0.6109, + "step": 460 + }, + { + "epoch": 0.31, + "learning_rate": 1.9715916254070883e-05, + "loss": 0.5672, + "step": 461 + }, + { + "epoch": 0.31, + "learning_rate": 1.971466109473716e-05, + "loss": 0.5713, + "step": 462 + }, + { + "epoch": 0.31, + "learning_rate": 1.9713403208824957e-05, + "loss": 0.5531, + "step": 463 + }, + { + "epoch": 0.31, + "learning_rate": 1.9712142596687314e-05, + "loss": 0.6088, + "step": 464 + }, + { + "epoch": 0.31, + "learning_rate": 1.9710879258678045e-05, + "loss": 0.542, + "step": 465 + }, + { + "epoch": 0.31, + "learning_rate": 1.9709613195151736e-05, + "loss": 0.579, + "step": 466 + }, + { + "epoch": 0.31, + "learning_rate": 1.9708344406463714e-05, + "loss": 0.5614, + "step": 467 + }, + { + "epoch": 0.32, + "learning_rate": 1.9707072892970095e-05, + "loss": 0.5719, + "step": 468 + }, + { + "epoch": 0.32, + "learning_rate": 1.9705798655027747e-05, + "loss": 0.592, + "step": 469 + }, + { + "epoch": 0.32, + "learning_rate": 1.9704521692994305e-05, + "loss": 0.5665, + "step": 470 + }, + { + "epoch": 0.32, + "learning_rate": 1.9703242007228172e-05, + "loss": 0.5465, + "step": 471 + }, + { + "epoch": 0.32, + "learning_rate": 1.970195959808851e-05, + "loss": 0.541, + "step": 472 + }, + { + "epoch": 0.32, + "learning_rate": 1.970067446593525e-05, + "loss": 0.5469, + "step": 473 + }, + { + "epoch": 0.32, + "learning_rate": 1.9699386611129082e-05, + "loss": 0.5905, + "step": 474 + }, + { + "epoch": 0.32, + "learning_rate": 1.969809603403147e-05, + "loss": 0.6295, + "step": 475 + }, + { + "epoch": 0.32, + "learning_rate": 1.9696802735004627e-05, + "loss": 0.5631, + "step": 476 + }, + { + "epoch": 0.32, + "learning_rate": 1.9695506714411543e-05, + "loss": 0.5421, + "step": 477 + }, + { + "epoch": 0.32, + "learning_rate": 1.9694207972615967e-05, + "loss": 0.5511, + "step": 478 + }, + { + "epoch": 0.32, + "learning_rate": 1.9692906509982416e-05, + "loss": 0.5908, + "step": 479 + }, + { + "epoch": 0.32, + "learning_rate": 1.969160232687616e-05, + "loss": 0.5496, + "step": 480 + }, + { + "epoch": 0.32, + "learning_rate": 1.9690295423663244e-05, + "loss": 0.5526, + "step": 481 + }, + { + "epoch": 0.32, + "learning_rate": 1.968898580071047e-05, + "loss": 0.592, + "step": 482 + }, + { + "epoch": 0.33, + "learning_rate": 1.9687673458385406e-05, + "loss": 0.5887, + "step": 483 + }, + { + "epoch": 0.33, + "learning_rate": 1.968635839705638e-05, + "loss": 0.5629, + "step": 484 + }, + { + "epoch": 0.33, + "learning_rate": 1.968504061709249e-05, + "loss": 0.5538, + "step": 485 + }, + { + "epoch": 0.33, + "learning_rate": 1.968372011886359e-05, + "loss": 0.5865, + "step": 486 + }, + { + "epoch": 0.33, + "learning_rate": 1.9682396902740294e-05, + "loss": 0.5421, + "step": 487 + }, + { + "epoch": 0.33, + "learning_rate": 1.968107096909399e-05, + "loss": 0.5903, + "step": 488 + }, + { + "epoch": 0.33, + "learning_rate": 1.9679742318296827e-05, + "loss": 0.6114, + "step": 489 + }, + { + "epoch": 0.33, + "learning_rate": 1.96784109507217e-05, + "loss": 0.5873, + "step": 490 + }, + { + "epoch": 0.33, + "learning_rate": 1.9677076866742295e-05, + "loss": 0.5646, + "step": 491 + }, + { + "epoch": 0.33, + "learning_rate": 1.9675740066733035e-05, + "loss": 0.6312, + "step": 492 + }, + { + "epoch": 0.33, + "learning_rate": 1.967440055106911e-05, + "loss": 0.5399, + "step": 493 + }, + { + "epoch": 0.33, + "learning_rate": 1.967305832012649e-05, + "loss": 0.5604, + "step": 494 + }, + { + "epoch": 0.33, + "learning_rate": 1.9671713374281883e-05, + "loss": 0.5732, + "step": 495 + }, + { + "epoch": 0.33, + "learning_rate": 1.9670365713912776e-05, + "loss": 0.5533, + "step": 496 + }, + { + "epoch": 0.33, + "learning_rate": 1.966901533939741e-05, + "loss": 0.5677, + "step": 497 + }, + { + "epoch": 0.34, + "learning_rate": 1.9667662251114793e-05, + "loss": 0.5533, + "step": 498 + }, + { + "epoch": 0.34, + "learning_rate": 1.9666306449444682e-05, + "loss": 0.5834, + "step": 499 + }, + { + "epoch": 0.34, + "learning_rate": 1.9664947934767614e-05, + "loss": 0.587, + "step": 500 + }, + { + "epoch": 0.34, + "learning_rate": 1.9663586707464876e-05, + "loss": 0.5544, + "step": 501 + }, + { + "epoch": 0.34, + "learning_rate": 1.9662222767918518e-05, + "loss": 0.5128, + "step": 502 + }, + { + "epoch": 0.34, + "learning_rate": 1.9660856116511354e-05, + "loss": 0.6291, + "step": 503 + }, + { + "epoch": 0.34, + "learning_rate": 1.9659486753626954e-05, + "loss": 0.6063, + "step": 504 + }, + { + "epoch": 0.34, + "learning_rate": 1.965811467964965e-05, + "loss": 0.5799, + "step": 505 + }, + { + "epoch": 0.34, + "learning_rate": 1.9656739894964544e-05, + "loss": 0.5628, + "step": 506 + }, + { + "epoch": 0.34, + "learning_rate": 1.9655362399957486e-05, + "loss": 0.5554, + "step": 507 + }, + { + "epoch": 0.34, + "learning_rate": 1.9653982195015095e-05, + "loss": 0.5484, + "step": 508 + }, + { + "epoch": 0.34, + "learning_rate": 1.965259928052475e-05, + "loss": 0.5388, + "step": 509 + }, + { + "epoch": 0.34, + "learning_rate": 1.965121365687458e-05, + "loss": 0.575, + "step": 510 + }, + { + "epoch": 0.34, + "learning_rate": 1.964982532445349e-05, + "loss": 0.5393, + "step": 511 + }, + { + "epoch": 0.34, + "learning_rate": 1.9648434283651144e-05, + "loss": 0.5949, + "step": 512 + }, + { + "epoch": 0.35, + "learning_rate": 1.9647040534857948e-05, + "loss": 0.5473, + "step": 513 + }, + { + "epoch": 0.35, + "learning_rate": 1.9645644078465088e-05, + "loss": 0.541, + "step": 514 + }, + { + "epoch": 0.35, + "learning_rate": 1.9644244914864502e-05, + "loss": 0.5781, + "step": 515 + }, + { + "epoch": 0.35, + "learning_rate": 1.9642843044448886e-05, + "loss": 0.642, + "step": 516 + }, + { + "epoch": 0.35, + "learning_rate": 1.9641438467611696e-05, + "loss": 0.5579, + "step": 517 + }, + { + "epoch": 0.35, + "learning_rate": 1.9640031184747152e-05, + "loss": 0.5438, + "step": 518 + }, + { + "epoch": 0.35, + "learning_rate": 1.9638621196250232e-05, + "loss": 0.5216, + "step": 519 + }, + { + "epoch": 0.35, + "learning_rate": 1.9637208502516673e-05, + "loss": 0.5954, + "step": 520 + }, + { + "epoch": 0.35, + "learning_rate": 1.9635793103942964e-05, + "loss": 0.5758, + "step": 521 + }, + { + "epoch": 0.35, + "learning_rate": 1.9634375000926367e-05, + "loss": 0.5904, + "step": 522 + }, + { + "epoch": 0.35, + "learning_rate": 1.9632954193864894e-05, + "loss": 0.5624, + "step": 523 + }, + { + "epoch": 0.35, + "learning_rate": 1.9631530683157316e-05, + "loss": 0.548, + "step": 524 + }, + { + "epoch": 0.35, + "learning_rate": 1.9630104469203165e-05, + "loss": 0.5552, + "step": 525 + }, + { + "epoch": 0.35, + "learning_rate": 1.9628675552402734e-05, + "loss": 0.5761, + "step": 526 + }, + { + "epoch": 0.35, + "learning_rate": 1.962724393315707e-05, + "loss": 0.5475, + "step": 527 + }, + { + "epoch": 0.36, + "learning_rate": 1.9625809611867977e-05, + "loss": 0.5524, + "step": 528 + }, + { + "epoch": 0.36, + "learning_rate": 1.9624372588938025e-05, + "loss": 0.5618, + "step": 529 + }, + { + "epoch": 0.36, + "learning_rate": 1.9622932864770538e-05, + "loss": 0.535, + "step": 530 + }, + { + "epoch": 0.36, + "learning_rate": 1.9621490439769594e-05, + "loss": 0.5327, + "step": 531 + }, + { + "epoch": 0.36, + "learning_rate": 1.9620045314340037e-05, + "loss": 0.5834, + "step": 532 + }, + { + "epoch": 0.36, + "learning_rate": 1.9618597488887462e-05, + "loss": 0.5423, + "step": 533 + }, + { + "epoch": 0.36, + "learning_rate": 1.9617146963818233e-05, + "loss": 0.5831, + "step": 534 + }, + { + "epoch": 0.36, + "learning_rate": 1.9615693739539452e-05, + "loss": 0.5654, + "step": 535 + }, + { + "epoch": 0.36, + "learning_rate": 1.9614237816459e-05, + "loss": 0.5795, + "step": 536 + }, + { + "epoch": 0.36, + "learning_rate": 1.96127791949855e-05, + "loss": 0.5691, + "step": 537 + }, + { + "epoch": 0.36, + "learning_rate": 1.9611317875528338e-05, + "loss": 0.537, + "step": 538 + }, + { + "epoch": 0.36, + "learning_rate": 1.9609853858497655e-05, + "loss": 0.5723, + "step": 539 + }, + { + "epoch": 0.36, + "learning_rate": 1.9608387144304363e-05, + "loss": 0.5443, + "step": 540 + }, + { + "epoch": 0.36, + "learning_rate": 1.9606917733360106e-05, + "loss": 0.5524, + "step": 541 + }, + { + "epoch": 0.36, + "learning_rate": 1.9605445626077305e-05, + "loss": 0.5858, + "step": 542 + }, + { + "epoch": 0.37, + "learning_rate": 1.9603970822869125e-05, + "loss": 0.573, + "step": 543 + }, + { + "epoch": 0.37, + "learning_rate": 1.9602493324149502e-05, + "loss": 0.5726, + "step": 544 + }, + { + "epoch": 0.37, + "learning_rate": 1.960101313033312e-05, + "loss": 0.5576, + "step": 545 + }, + { + "epoch": 0.37, + "learning_rate": 1.9599530241835407e-05, + "loss": 0.5967, + "step": 546 + }, + { + "epoch": 0.37, + "learning_rate": 1.9598044659072573e-05, + "loss": 0.5562, + "step": 547 + }, + { + "epoch": 0.37, + "learning_rate": 1.9596556382461567e-05, + "loss": 0.5736, + "step": 548 + }, + { + "epoch": 0.37, + "learning_rate": 1.9595065412420097e-05, + "loss": 0.5825, + "step": 549 + }, + { + "epoch": 0.37, + "learning_rate": 1.959357174936663e-05, + "loss": 0.5675, + "step": 550 + }, + { + "epoch": 0.37, + "learning_rate": 1.9592075393720385e-05, + "loss": 0.5587, + "step": 551 + }, + { + "epoch": 0.37, + "learning_rate": 1.959057634590134e-05, + "loss": 0.5538, + "step": 552 + }, + { + "epoch": 0.37, + "learning_rate": 1.9589074606330227e-05, + "loss": 0.5589, + "step": 553 + }, + { + "epoch": 0.37, + "learning_rate": 1.9587570175428533e-05, + "loss": 0.5796, + "step": 554 + }, + { + "epoch": 0.37, + "learning_rate": 1.95860630536185e-05, + "loss": 0.5732, + "step": 555 + }, + { + "epoch": 0.37, + "learning_rate": 1.9584553241323133e-05, + "loss": 0.5658, + "step": 556 + }, + { + "epoch": 0.38, + "learning_rate": 1.9583040738966178e-05, + "loss": 0.5565, + "step": 557 + }, + { + "epoch": 0.38, + "learning_rate": 1.958152554697215e-05, + "loss": 0.5791, + "step": 558 + }, + { + "epoch": 0.38, + "learning_rate": 1.9580007665766306e-05, + "loss": 0.6232, + "step": 559 + }, + { + "epoch": 0.38, + "learning_rate": 1.9578487095774666e-05, + "loss": 0.5555, + "step": 560 + }, + { + "epoch": 0.38, + "learning_rate": 1.9576963837424e-05, + "loss": 0.5638, + "step": 561 + }, + { + "epoch": 0.38, + "learning_rate": 1.9575437891141843e-05, + "loss": 0.5563, + "step": 562 + }, + { + "epoch": 0.38, + "learning_rate": 1.9573909257356474e-05, + "loss": 0.5773, + "step": 563 + }, + { + "epoch": 0.38, + "learning_rate": 1.9572377936496926e-05, + "loss": 0.5515, + "step": 564 + }, + { + "epoch": 0.38, + "learning_rate": 1.957084392899299e-05, + "loss": 0.5485, + "step": 565 + }, + { + "epoch": 0.38, + "learning_rate": 1.9569307235275214e-05, + "loss": 0.6222, + "step": 566 + }, + { + "epoch": 0.38, + "learning_rate": 1.9567767855774892e-05, + "loss": 0.5472, + "step": 567 + }, + { + "epoch": 0.38, + "learning_rate": 1.956622579092408e-05, + "loss": 0.6121, + "step": 568 + }, + { + "epoch": 0.38, + "learning_rate": 1.9564681041155576e-05, + "loss": 0.5712, + "step": 569 + }, + { + "epoch": 0.38, + "learning_rate": 1.956313360690295e-05, + "loss": 0.623, + "step": 570 + }, + { + "epoch": 0.38, + "learning_rate": 1.956158348860051e-05, + "loss": 0.5701, + "step": 571 + }, + { + "epoch": 0.39, + "learning_rate": 1.9560030686683316e-05, + "loss": 0.5747, + "step": 572 + }, + { + "epoch": 0.39, + "learning_rate": 1.9558475201587198e-05, + "loss": 0.6337, + "step": 573 + }, + { + "epoch": 0.39, + "learning_rate": 1.955691703374872e-05, + "loss": 0.5592, + "step": 574 + }, + { + "epoch": 0.39, + "learning_rate": 1.955535618360521e-05, + "loss": 0.6027, + "step": 575 + }, + { + "epoch": 0.39, + "learning_rate": 1.955379265159475e-05, + "loss": 0.5717, + "step": 576 + }, + { + "epoch": 0.39, + "learning_rate": 1.955222643815616e-05, + "loss": 0.6055, + "step": 577 + }, + { + "epoch": 0.39, + "learning_rate": 1.9550657543729038e-05, + "loss": 0.5543, + "step": 578 + }, + { + "epoch": 0.39, + "learning_rate": 1.9549085968753707e-05, + "loss": 0.5172, + "step": 579 + }, + { + "epoch": 0.39, + "learning_rate": 1.9547511713671264e-05, + "loss": 0.5725, + "step": 580 + }, + { + "epoch": 0.39, + "learning_rate": 1.9545934778923545e-05, + "loss": 0.5769, + "step": 581 + }, + { + "epoch": 0.39, + "learning_rate": 1.9544355164953143e-05, + "loss": 0.555, + "step": 582 + }, + { + "epoch": 0.39, + "learning_rate": 1.9542772872203404e-05, + "loss": 0.5751, + "step": 583 + }, + { + "epoch": 0.39, + "learning_rate": 1.9541187901118423e-05, + "loss": 0.6036, + "step": 584 + }, + { + "epoch": 0.39, + "learning_rate": 1.953960025214305e-05, + "loss": 0.6035, + "step": 585 + }, + { + "epoch": 0.39, + "learning_rate": 1.953800992572288e-05, + "loss": 0.5255, + "step": 586 + }, + { + "epoch": 0.4, + "learning_rate": 1.953641692230427e-05, + "loss": 0.5611, + "step": 587 + }, + { + "epoch": 0.4, + "learning_rate": 1.953482124233432e-05, + "loss": 0.5736, + "step": 588 + }, + { + "epoch": 0.4, + "learning_rate": 1.953322288626088e-05, + "loss": 0.5818, + "step": 589 + }, + { + "epoch": 0.4, + "learning_rate": 1.9531621854532562e-05, + "loss": 0.5095, + "step": 590 + }, + { + "epoch": 0.4, + "learning_rate": 1.9530018147598717e-05, + "loss": 0.5513, + "step": 591 + }, + { + "epoch": 0.4, + "learning_rate": 1.9528411765909452e-05, + "loss": 0.5914, + "step": 592 + }, + { + "epoch": 0.4, + "learning_rate": 1.9526802709915623e-05, + "loss": 0.5548, + "step": 593 + }, + { + "epoch": 0.4, + "learning_rate": 1.9525190980068843e-05, + "loss": 0.5366, + "step": 594 + }, + { + "epoch": 0.4, + "learning_rate": 1.9523576576821463e-05, + "loss": 0.5367, + "step": 595 + }, + { + "epoch": 0.4, + "learning_rate": 1.95219595006266e-05, + "loss": 0.5409, + "step": 596 + }, + { + "epoch": 0.4, + "learning_rate": 1.9520339751938103e-05, + "loss": 0.5539, + "step": 597 + }, + { + "epoch": 0.4, + "learning_rate": 1.9518717331210594e-05, + "loss": 0.5785, + "step": 598 + }, + { + "epoch": 0.4, + "learning_rate": 1.951709223889942e-05, + "loss": 0.5517, + "step": 599 + }, + { + "epoch": 0.4, + "learning_rate": 1.9515464475460692e-05, + "loss": 0.5972, + "step": 600 + }, + { + "epoch": 0.4, + "learning_rate": 1.9513834041351277e-05, + "loss": 0.5328, + "step": 601 + }, + { + "epoch": 0.41, + "learning_rate": 1.9512200937028767e-05, + "loss": 0.6033, + "step": 602 + }, + { + "epoch": 0.41, + "learning_rate": 1.9510565162951538e-05, + "loss": 0.5525, + "step": 603 + }, + { + "epoch": 0.41, + "learning_rate": 1.9508926719578683e-05, + "loss": 0.5965, + "step": 604 + }, + { + "epoch": 0.41, + "learning_rate": 1.9507285607370065e-05, + "loss": 0.5734, + "step": 605 + }, + { + "epoch": 0.41, + "learning_rate": 1.9505641826786282e-05, + "loss": 0.56, + "step": 606 + }, + { + "epoch": 0.41, + "learning_rate": 1.9503995378288697e-05, + "loss": 0.5585, + "step": 607 + }, + { + "epoch": 0.41, + "learning_rate": 1.950234626233941e-05, + "loss": 0.5904, + "step": 608 + }, + { + "epoch": 0.41, + "learning_rate": 1.9500694479401266e-05, + "loss": 0.5417, + "step": 609 + }, + { + "epoch": 0.41, + "learning_rate": 1.949904002993787e-05, + "loss": 0.5612, + "step": 610 + }, + { + "epoch": 0.41, + "learning_rate": 1.949738291441357e-05, + "loss": 0.5519, + "step": 611 + }, + { + "epoch": 0.41, + "learning_rate": 1.9495723133293465e-05, + "loss": 0.5326, + "step": 612 + }, + { + "epoch": 0.41, + "learning_rate": 1.9494060687043393e-05, + "loss": 0.6025, + "step": 613 + }, + { + "epoch": 0.41, + "learning_rate": 1.9492395576129953e-05, + "loss": 0.5566, + "step": 614 + }, + { + "epoch": 0.41, + "learning_rate": 1.9490727801020485e-05, + "loss": 0.5631, + "step": 615 + }, + { + "epoch": 0.41, + "learning_rate": 1.9489057362183074e-05, + "loss": 0.5973, + "step": 616 + }, + { + "epoch": 0.42, + "learning_rate": 1.9487384260086557e-05, + "loss": 0.5548, + "step": 617 + }, + { + "epoch": 0.42, + "learning_rate": 1.9485708495200517e-05, + "loss": 0.5686, + "step": 618 + }, + { + "epoch": 0.42, + "learning_rate": 1.948403006799529e-05, + "loss": 0.5612, + "step": 619 + }, + { + "epoch": 0.42, + "learning_rate": 1.9482348978941947e-05, + "loss": 0.6014, + "step": 620 + }, + { + "epoch": 0.42, + "learning_rate": 1.9480665228512314e-05, + "loss": 0.5584, + "step": 621 + }, + { + "epoch": 0.42, + "learning_rate": 1.9478978817178967e-05, + "loss": 0.5733, + "step": 622 + }, + { + "epoch": 0.42, + "learning_rate": 1.9477289745415225e-05, + "loss": 0.5554, + "step": 623 + }, + { + "epoch": 0.42, + "learning_rate": 1.947559801369515e-05, + "loss": 0.5889, + "step": 624 + }, + { + "epoch": 0.42, + "learning_rate": 1.9473903622493554e-05, + "loss": 0.517, + "step": 625 + }, + { + "epoch": 0.42, + "learning_rate": 1.9472206572286003e-05, + "loss": 0.5979, + "step": 626 + }, + { + "epoch": 0.42, + "learning_rate": 1.947050686354879e-05, + "loss": 0.5714, + "step": 627 + }, + { + "epoch": 0.42, + "learning_rate": 1.946880449675898e-05, + "loss": 0.5673, + "step": 628 + }, + { + "epoch": 0.42, + "learning_rate": 1.9467099472394357e-05, + "loss": 0.5075, + "step": 629 + }, + { + "epoch": 0.42, + "learning_rate": 1.946539179093347e-05, + "loss": 0.5806, + "step": 630 + }, + { + "epoch": 0.42, + "learning_rate": 1.9463681452855607e-05, + "loss": 0.5948, + "step": 631 + }, + { + "epoch": 0.43, + "learning_rate": 1.9461968458640802e-05, + "loss": 0.5642, + "step": 632 + }, + { + "epoch": 0.43, + "learning_rate": 1.9460252808769836e-05, + "loss": 0.5767, + "step": 633 + }, + { + "epoch": 0.43, + "learning_rate": 1.9458534503724236e-05, + "loss": 0.5629, + "step": 634 + }, + { + "epoch": 0.43, + "learning_rate": 1.945681354398627e-05, + "loss": 0.5585, + "step": 635 + }, + { + "epoch": 0.43, + "learning_rate": 1.945508993003895e-05, + "loss": 0.5811, + "step": 636 + }, + { + "epoch": 0.43, + "learning_rate": 1.945336366236604e-05, + "loss": 0.5724, + "step": 637 + }, + { + "epoch": 0.43, + "learning_rate": 1.9451634741452047e-05, + "loss": 0.5595, + "step": 638 + }, + { + "epoch": 0.43, + "learning_rate": 1.944990316778222e-05, + "loss": 0.5611, + "step": 639 + }, + { + "epoch": 0.43, + "learning_rate": 1.944816894184255e-05, + "loss": 0.5891, + "step": 640 + }, + { + "epoch": 0.43, + "learning_rate": 1.944643206411978e-05, + "loss": 0.5402, + "step": 641 + }, + { + "epoch": 0.43, + "learning_rate": 1.944469253510139e-05, + "loss": 0.5984, + "step": 642 + }, + { + "epoch": 0.43, + "learning_rate": 1.9442950355275613e-05, + "loss": 0.5142, + "step": 643 + }, + { + "epoch": 0.43, + "learning_rate": 1.944120552513141e-05, + "loss": 0.5582, + "step": 644 + }, + { + "epoch": 0.43, + "learning_rate": 1.943945804515851e-05, + "loss": 0.5764, + "step": 645 + }, + { + "epoch": 0.43, + "learning_rate": 1.9437707915847355e-05, + "loss": 0.558, + "step": 646 + }, + { + "epoch": 0.44, + "learning_rate": 1.943595513768916e-05, + "loss": 0.5538, + "step": 647 + }, + { + "epoch": 0.44, + "learning_rate": 1.943419971117587e-05, + "loss": 0.5488, + "step": 648 + }, + { + "epoch": 0.44, + "learning_rate": 1.943244163680017e-05, + "loss": 0.5556, + "step": 649 + }, + { + "epoch": 0.44, + "learning_rate": 1.9430680915055492e-05, + "loss": 0.5655, + "step": 650 + }, + { + "epoch": 0.44, + "learning_rate": 1.9428917546436014e-05, + "loss": 0.5852, + "step": 651 + }, + { + "epoch": 0.44, + "learning_rate": 1.9427151531436652e-05, + "loss": 0.5719, + "step": 652 + }, + { + "epoch": 0.44, + "learning_rate": 1.942538287055307e-05, + "loss": 0.5358, + "step": 653 + }, + { + "epoch": 0.44, + "learning_rate": 1.942361156428167e-05, + "loss": 0.5587, + "step": 654 + }, + { + "epoch": 0.44, + "learning_rate": 1.9421837613119597e-05, + "loss": 0.5437, + "step": 655 + }, + { + "epoch": 0.44, + "learning_rate": 1.9420061017564743e-05, + "loss": 0.5874, + "step": 656 + }, + { + "epoch": 0.44, + "learning_rate": 1.941828177811573e-05, + "loss": 0.5545, + "step": 657 + }, + { + "epoch": 0.44, + "learning_rate": 1.941649989527194e-05, + "loss": 0.5799, + "step": 658 + }, + { + "epoch": 0.44, + "learning_rate": 1.9414715369533485e-05, + "loss": 0.5957, + "step": 659 + }, + { + "epoch": 0.44, + "learning_rate": 1.941292820140122e-05, + "loss": 0.5477, + "step": 660 + }, + { + "epoch": 0.45, + "learning_rate": 1.9411138391376742e-05, + "loss": 0.5512, + "step": 661 + }, + { + "epoch": 0.45, + "learning_rate": 1.9409345939962393e-05, + "loss": 0.541, + "step": 662 + }, + { + "epoch": 0.45, + "learning_rate": 1.9407550847661256e-05, + "loss": 0.5706, + "step": 663 + }, + { + "epoch": 0.45, + "learning_rate": 1.9405753114977145e-05, + "loss": 0.5745, + "step": 664 + }, + { + "epoch": 0.45, + "learning_rate": 1.940395274241463e-05, + "loss": 0.6005, + "step": 665 + }, + { + "epoch": 0.45, + "learning_rate": 1.940214973047901e-05, + "loss": 0.5466, + "step": 666 + }, + { + "epoch": 0.45, + "learning_rate": 1.9400344079676335e-05, + "loss": 0.562, + "step": 667 + }, + { + "epoch": 0.45, + "learning_rate": 1.939853579051339e-05, + "loss": 0.5724, + "step": 668 + }, + { + "epoch": 0.45, + "learning_rate": 1.9396724863497695e-05, + "loss": 0.5713, + "step": 669 + }, + { + "epoch": 0.45, + "learning_rate": 1.9394911299137522e-05, + "loss": 0.6093, + "step": 670 + }, + { + "epoch": 0.45, + "learning_rate": 1.9393095097941873e-05, + "loss": 0.5525, + "step": 671 + }, + { + "epoch": 0.45, + "learning_rate": 1.93912762604205e-05, + "loss": 0.5629, + "step": 672 + }, + { + "epoch": 0.45, + "learning_rate": 1.9389454787083884e-05, + "loss": 0.604, + "step": 673 + }, + { + "epoch": 0.45, + "learning_rate": 1.938763067844326e-05, + "loss": 0.5516, + "step": 674 + }, + { + "epoch": 0.45, + "learning_rate": 1.938580393501058e-05, + "loss": 0.5371, + "step": 675 + }, + { + "epoch": 0.46, + "learning_rate": 1.9383974557298563e-05, + "loss": 0.557, + "step": 676 + }, + { + "epoch": 0.46, + "learning_rate": 1.938214254582065e-05, + "loss": 0.5684, + "step": 677 + }, + { + "epoch": 0.46, + "learning_rate": 1.9380307901091014e-05, + "loss": 0.5569, + "step": 678 + }, + { + "epoch": 0.46, + "learning_rate": 1.9378470623624594e-05, + "loss": 0.551, + "step": 679 + }, + { + "epoch": 0.46, + "learning_rate": 1.9376630713937043e-05, + "loss": 0.6101, + "step": 680 + }, + { + "epoch": 0.46, + "learning_rate": 1.9374788172544765e-05, + "loss": 0.569, + "step": 681 + }, + { + "epoch": 0.46, + "learning_rate": 1.9372942999964895e-05, + "loss": 0.5669, + "step": 682 + }, + { + "epoch": 0.46, + "learning_rate": 1.9371095196715316e-05, + "loss": 0.5921, + "step": 683 + }, + { + "epoch": 0.46, + "learning_rate": 1.9369244763314645e-05, + "loss": 0.5578, + "step": 684 + }, + { + "epoch": 0.46, + "learning_rate": 1.9367391700282228e-05, + "loss": 0.5318, + "step": 685 + }, + { + "epoch": 0.46, + "learning_rate": 1.9365536008138172e-05, + "loss": 0.557, + "step": 686 + }, + { + "epoch": 0.46, + "learning_rate": 1.936367768740329e-05, + "loss": 0.539, + "step": 687 + }, + { + "epoch": 0.46, + "learning_rate": 1.936181673859916e-05, + "loss": 0.6057, + "step": 688 + }, + { + "epoch": 0.46, + "learning_rate": 1.935995316224809e-05, + "loss": 0.5804, + "step": 689 + }, + { + "epoch": 0.46, + "learning_rate": 1.9358086958873116e-05, + "loss": 0.5773, + "step": 690 + }, + { + "epoch": 0.47, + "learning_rate": 1.935621812899802e-05, + "loss": 0.5493, + "step": 691 + }, + { + "epoch": 0.47, + "learning_rate": 1.9354346673147323e-05, + "loss": 0.5728, + "step": 692 + }, + { + "epoch": 0.47, + "learning_rate": 1.9352472591846282e-05, + "loss": 0.5789, + "step": 693 + }, + { + "epoch": 0.47, + "learning_rate": 1.935059588562088e-05, + "loss": 0.5603, + "step": 694 + }, + { + "epoch": 0.47, + "learning_rate": 1.9348716554997854e-05, + "loss": 0.5734, + "step": 695 + }, + { + "epoch": 0.47, + "learning_rate": 1.9346834600504664e-05, + "loss": 0.5711, + "step": 696 + }, + { + "epoch": 0.47, + "learning_rate": 1.9344950022669507e-05, + "loss": 0.5498, + "step": 697 + }, + { + "epoch": 0.47, + "learning_rate": 1.9343062822021332e-05, + "loss": 0.5745, + "step": 698 + }, + { + "epoch": 0.47, + "learning_rate": 1.9341172999089805e-05, + "loss": 0.5516, + "step": 699 + }, + { + "epoch": 0.47, + "learning_rate": 1.9339280554405336e-05, + "loss": 0.5672, + "step": 700 + }, + { + "epoch": 0.47, + "learning_rate": 1.9337385488499074e-05, + "loss": 0.5404, + "step": 701 + }, + { + "epoch": 0.47, + "learning_rate": 1.9335487801902896e-05, + "loss": 0.5741, + "step": 702 + }, + { + "epoch": 0.47, + "learning_rate": 1.9333587495149418e-05, + "loss": 0.5714, + "step": 703 + }, + { + "epoch": 0.47, + "learning_rate": 1.9331684568772e-05, + "loss": 0.5669, + "step": 704 + }, + { + "epoch": 0.47, + "learning_rate": 1.9329779023304724e-05, + "loss": 0.5737, + "step": 705 + }, + { + "epoch": 0.48, + "learning_rate": 1.932787085928241e-05, + "loss": 0.5828, + "step": 706 + }, + { + "epoch": 0.48, + "learning_rate": 1.932596007724062e-05, + "loss": 0.5557, + "step": 707 + }, + { + "epoch": 0.48, + "learning_rate": 1.9324046677715644e-05, + "loss": 0.5757, + "step": 708 + }, + { + "epoch": 0.48, + "learning_rate": 1.9322130661244508e-05, + "loss": 0.6158, + "step": 709 + }, + { + "epoch": 0.48, + "learning_rate": 1.9320212028364976e-05, + "loss": 0.531, + "step": 710 + }, + { + "epoch": 0.48, + "learning_rate": 1.9318290779615545e-05, + "loss": 0.5411, + "step": 711 + }, + { + "epoch": 0.48, + "learning_rate": 1.931636691553544e-05, + "loss": 0.5593, + "step": 712 + }, + { + "epoch": 0.48, + "learning_rate": 1.9314440436664626e-05, + "loss": 0.5658, + "step": 713 + }, + { + "epoch": 0.48, + "learning_rate": 1.9312511343543802e-05, + "loss": 0.5591, + "step": 714 + }, + { + "epoch": 0.48, + "learning_rate": 1.9310579636714402e-05, + "loss": 0.5617, + "step": 715 + }, + { + "epoch": 0.48, + "learning_rate": 1.930864531671859e-05, + "loss": 0.5429, + "step": 716 + }, + { + "epoch": 0.48, + "learning_rate": 1.9306708384099258e-05, + "loss": 0.5369, + "step": 717 + }, + { + "epoch": 0.48, + "learning_rate": 1.9304768839400046e-05, + "loss": 0.5455, + "step": 718 + }, + { + "epoch": 0.48, + "learning_rate": 1.9302826683165315e-05, + "loss": 0.5672, + "step": 719 + }, + { + "epoch": 0.48, + "learning_rate": 1.9300881915940163e-05, + "loss": 0.5997, + "step": 720 + }, + { + "epoch": 0.49, + "learning_rate": 1.9298934538270423e-05, + "loss": 0.5621, + "step": 721 + }, + { + "epoch": 0.49, + "learning_rate": 1.9296984550702656e-05, + "loss": 0.5844, + "step": 722 + }, + { + "epoch": 0.49, + "learning_rate": 1.929503195378416e-05, + "loss": 0.6057, + "step": 723 + }, + { + "epoch": 0.49, + "learning_rate": 1.929307674806296e-05, + "loss": 0.5534, + "step": 724 + }, + { + "epoch": 0.49, + "learning_rate": 1.929111893408782e-05, + "loss": 0.5135, + "step": 725 + }, + { + "epoch": 0.49, + "learning_rate": 1.928915851240823e-05, + "loss": 0.5553, + "step": 726 + }, + { + "epoch": 0.49, + "learning_rate": 1.928719548357442e-05, + "loss": 0.5783, + "step": 727 + }, + { + "epoch": 0.49, + "learning_rate": 1.9285229848137335e-05, + "loss": 0.5537, + "step": 728 + }, + { + "epoch": 0.49, + "learning_rate": 1.9283261606648672e-05, + "loss": 0.5447, + "step": 729 + }, + { + "epoch": 0.49, + "learning_rate": 1.928129075966085e-05, + "loss": 0.5886, + "step": 730 + }, + { + "epoch": 0.49, + "learning_rate": 1.9279317307727012e-05, + "loss": 0.5832, + "step": 731 + }, + { + "epoch": 0.49, + "learning_rate": 1.9277341251401047e-05, + "loss": 0.5591, + "step": 732 + }, + { + "epoch": 0.49, + "learning_rate": 1.9275362591237564e-05, + "loss": 0.5592, + "step": 733 + }, + { + "epoch": 0.49, + "learning_rate": 1.927338132779191e-05, + "loss": 0.536, + "step": 734 + }, + { + "epoch": 0.49, + "learning_rate": 1.9271397461620154e-05, + "loss": 0.5591, + "step": 735 + }, + { + "epoch": 0.5, + "learning_rate": 1.9269410993279104e-05, + "loss": 0.5325, + "step": 736 + }, + { + "epoch": 0.5, + "learning_rate": 1.926742192332629e-05, + "loss": 0.5783, + "step": 737 + }, + { + "epoch": 0.5, + "learning_rate": 1.9265430252319984e-05, + "loss": 0.5576, + "step": 738 + }, + { + "epoch": 0.5, + "learning_rate": 1.9263435980819177e-05, + "loss": 0.6244, + "step": 739 + }, + { + "epoch": 0.5, + "learning_rate": 1.9261439109383594e-05, + "loss": 0.5723, + "step": 740 + }, + { + "epoch": 0.5, + "learning_rate": 1.9259439638573688e-05, + "loss": 0.5856, + "step": 741 + }, + { + "epoch": 0.5, + "learning_rate": 1.925743756895065e-05, + "loss": 0.5135, + "step": 742 + }, + { + "epoch": 0.5, + "learning_rate": 1.9255432901076386e-05, + "loss": 0.5601, + "step": 743 + }, + { + "epoch": 0.5, + "learning_rate": 1.9253425635513543e-05, + "loss": 0.5737, + "step": 744 + }, + { + "epoch": 0.5, + "learning_rate": 1.925141577282549e-05, + "loss": 0.5594, + "step": 745 + }, + { + "epoch": 0.5, + "learning_rate": 1.9249403313576328e-05, + "loss": 0.5622, + "step": 746 + }, + { + "epoch": 0.5, + "learning_rate": 1.9247388258330892e-05, + "loss": 0.5669, + "step": 747 + }, + { + "epoch": 0.5, + "learning_rate": 1.9245370607654733e-05, + "loss": 0.5544, + "step": 748 + }, + { + "epoch": 0.5, + "learning_rate": 1.9243350362114143e-05, + "loss": 0.5297, + "step": 749 + }, + { + "epoch": 0.51, + "learning_rate": 1.9241327522276133e-05, + "loss": 0.5723, + "step": 750 + }, + { + "epoch": 0.51, + "learning_rate": 1.9239302088708452e-05, + "loss": 0.5341, + "step": 751 + }, + { + "epoch": 0.51, + "learning_rate": 1.9237274061979564e-05, + "loss": 0.6035, + "step": 752 + }, + { + "epoch": 0.51, + "learning_rate": 1.923524344265867e-05, + "loss": 0.5262, + "step": 753 + }, + { + "epoch": 0.51, + "learning_rate": 1.9233210231315697e-05, + "loss": 0.5501, + "step": 754 + }, + { + "epoch": 0.51, + "learning_rate": 1.92311744285213e-05, + "loss": 0.5783, + "step": 755 + }, + { + "epoch": 0.51, + "learning_rate": 1.9229136034846858e-05, + "loss": 0.5909, + "step": 756 + }, + { + "epoch": 0.51, + "learning_rate": 1.9227095050864485e-05, + "loss": 0.5896, + "step": 757 + }, + { + "epoch": 0.51, + "learning_rate": 1.922505147714701e-05, + "loss": 0.5653, + "step": 758 + }, + { + "epoch": 0.51, + "learning_rate": 1.9223005314268e-05, + "loss": 0.5263, + "step": 759 + }, + { + "epoch": 0.51, + "learning_rate": 1.922095656280174e-05, + "loss": 0.5538, + "step": 760 + }, + { + "epoch": 0.51, + "learning_rate": 1.921890522332325e-05, + "loss": 0.5532, + "step": 761 + }, + { + "epoch": 0.51, + "learning_rate": 1.921685129640827e-05, + "loss": 0.5551, + "step": 762 + }, + { + "epoch": 0.51, + "learning_rate": 1.9214794782633264e-05, + "loss": 0.5391, + "step": 763 + }, + { + "epoch": 0.51, + "learning_rate": 1.921273568257543e-05, + "loss": 0.5658, + "step": 764 + }, + { + "epoch": 0.52, + "learning_rate": 1.9210673996812694e-05, + "loss": 0.5353, + "step": 765 + }, + { + "epoch": 0.52, + "learning_rate": 1.920860972592369e-05, + "loss": 0.549, + "step": 766 + }, + { + "epoch": 0.52, + "learning_rate": 1.92065428704878e-05, + "loss": 0.5588, + "step": 767 + }, + { + "epoch": 0.52, + "learning_rate": 1.9204473431085114e-05, + "loss": 0.5849, + "step": 768 + }, + { + "epoch": 0.52, + "learning_rate": 1.9202401408296457e-05, + "loss": 0.5869, + "step": 769 + }, + { + "epoch": 0.52, + "learning_rate": 1.9200326802703374e-05, + "loss": 0.5734, + "step": 770 + }, + { + "epoch": 0.52, + "learning_rate": 1.9198249614888138e-05, + "loss": 0.5652, + "step": 771 + }, + { + "epoch": 0.52, + "learning_rate": 1.919616984543375e-05, + "loss": 0.6093, + "step": 772 + }, + { + "epoch": 0.52, + "learning_rate": 1.919408749492392e-05, + "loss": 0.562, + "step": 773 + }, + { + "epoch": 0.52, + "learning_rate": 1.919200256394311e-05, + "loss": 0.5512, + "step": 774 + }, + { + "epoch": 0.52, + "learning_rate": 1.9189915053076472e-05, + "loss": 0.5685, + "step": 775 + }, + { + "epoch": 0.52, + "learning_rate": 1.9187824962909912e-05, + "loss": 0.513, + "step": 776 + }, + { + "epoch": 0.52, + "learning_rate": 1.918573229403005e-05, + "loss": 0.5912, + "step": 777 + }, + { + "epoch": 0.52, + "learning_rate": 1.9183637047024218e-05, + "loss": 0.5767, + "step": 778 + }, + { + "epoch": 0.52, + "learning_rate": 1.9181539222480484e-05, + "loss": 0.5945, + "step": 779 + }, + { + "epoch": 0.53, + "learning_rate": 1.9179438820987645e-05, + "loss": 0.5604, + "step": 780 + }, + { + "epoch": 0.53, + "learning_rate": 1.9177335843135202e-05, + "loss": 0.5295, + "step": 781 + }, + { + "epoch": 0.53, + "learning_rate": 1.91752302895134e-05, + "loss": 0.5536, + "step": 782 + }, + { + "epoch": 0.53, + "learning_rate": 1.9173122160713187e-05, + "loss": 0.549, + "step": 783 + }, + { + "epoch": 0.53, + "learning_rate": 1.917101145732625e-05, + "loss": 0.5432, + "step": 784 + }, + { + "epoch": 0.53, + "learning_rate": 1.9168898179944994e-05, + "loss": 0.5829, + "step": 785 + }, + { + "epoch": 0.53, + "learning_rate": 1.9166782329162538e-05, + "loss": 0.5574, + "step": 786 + }, + { + "epoch": 0.53, + "learning_rate": 1.9164663905572734e-05, + "loss": 0.5803, + "step": 787 + }, + { + "epoch": 0.53, + "learning_rate": 1.9162542909770153e-05, + "loss": 0.5806, + "step": 788 + }, + { + "epoch": 0.53, + "learning_rate": 1.916041934235009e-05, + "loss": 0.5486, + "step": 789 + }, + { + "epoch": 0.53, + "learning_rate": 1.9158293203908552e-05, + "loss": 0.5411, + "step": 790 + }, + { + "epoch": 0.53, + "learning_rate": 1.9156164495042283e-05, + "loss": 0.5524, + "step": 791 + }, + { + "epoch": 0.53, + "learning_rate": 1.9154033216348732e-05, + "loss": 0.5782, + "step": 792 + }, + { + "epoch": 0.53, + "learning_rate": 1.915189936842608e-05, + "loss": 0.5995, + "step": 793 + }, + { + "epoch": 0.53, + "learning_rate": 1.9149762951873224e-05, + "loss": 0.5672, + "step": 794 + }, + { + "epoch": 0.54, + "learning_rate": 1.914762396728979e-05, + "loss": 0.5396, + "step": 795 + }, + { + "epoch": 0.54, + "learning_rate": 1.9145482415276113e-05, + "loss": 0.5665, + "step": 796 + }, + { + "epoch": 0.54, + "learning_rate": 1.9143338296433262e-05, + "loss": 0.5891, + "step": 797 + }, + { + "epoch": 0.54, + "learning_rate": 1.914119161136301e-05, + "loss": 0.5535, + "step": 798 + }, + { + "epoch": 0.54, + "learning_rate": 1.9139042360667865e-05, + "loss": 0.5399, + "step": 799 + }, + { + "epoch": 0.54, + "learning_rate": 1.9136890544951046e-05, + "loss": 0.555, + "step": 800 + }, + { + "epoch": 0.54, + "learning_rate": 1.91347361648165e-05, + "loss": 0.5976, + "step": 801 + }, + { + "epoch": 0.54, + "learning_rate": 1.913257922086889e-05, + "loss": 0.5292, + "step": 802 + }, + { + "epoch": 0.54, + "learning_rate": 1.9130419713713588e-05, + "loss": 0.5547, + "step": 803 + }, + { + "epoch": 0.54, + "learning_rate": 1.9128257643956703e-05, + "loss": 0.5403, + "step": 804 + }, + { + "epoch": 0.54, + "learning_rate": 1.912609301220505e-05, + "loss": 0.5898, + "step": 805 + }, + { + "epoch": 0.54, + "learning_rate": 1.9123925819066172e-05, + "loss": 0.5698, + "step": 806 + }, + { + "epoch": 0.54, + "learning_rate": 1.9121756065148333e-05, + "loss": 0.6113, + "step": 807 + }, + { + "epoch": 0.54, + "learning_rate": 1.9119583751060495e-05, + "loss": 0.5581, + "step": 808 + }, + { + "epoch": 0.54, + "learning_rate": 1.9117408877412366e-05, + "loss": 0.5537, + "step": 809 + }, + { + "epoch": 0.55, + "learning_rate": 1.9115231444814356e-05, + "loss": 0.5646, + "step": 810 + }, + { + "epoch": 0.55, + "learning_rate": 1.9113051453877595e-05, + "loss": 0.5092, + "step": 811 + }, + { + "epoch": 0.55, + "learning_rate": 1.911086890521394e-05, + "loss": 0.5775, + "step": 812 + }, + { + "epoch": 0.55, + "learning_rate": 1.910868379943595e-05, + "loss": 0.5368, + "step": 813 + }, + { + "epoch": 0.55, + "learning_rate": 1.910649613715691e-05, + "loss": 0.5578, + "step": 814 + }, + { + "epoch": 0.55, + "learning_rate": 1.9104305918990832e-05, + "loss": 0.5948, + "step": 815 + }, + { + "epoch": 0.55, + "learning_rate": 1.9102113145552434e-05, + "loss": 0.5245, + "step": 816 + }, + { + "epoch": 0.55, + "learning_rate": 1.9099917817457152e-05, + "loss": 0.5549, + "step": 817 + }, + { + "epoch": 0.55, + "learning_rate": 1.9097719935321137e-05, + "loss": 0.5862, + "step": 818 + }, + { + "epoch": 0.55, + "learning_rate": 1.909551949976127e-05, + "loss": 0.5024, + "step": 819 + }, + { + "epoch": 0.55, + "learning_rate": 1.9093316511395128e-05, + "loss": 0.5631, + "step": 820 + }, + { + "epoch": 0.55, + "learning_rate": 1.9091110970841024e-05, + "loss": 0.5404, + "step": 821 + }, + { + "epoch": 0.55, + "learning_rate": 1.9088902878717978e-05, + "loss": 0.5944, + "step": 822 + }, + { + "epoch": 0.55, + "learning_rate": 1.908669223564572e-05, + "loss": 0.5525, + "step": 823 + }, + { + "epoch": 0.55, + "learning_rate": 1.9084479042244714e-05, + "loss": 0.6176, + "step": 824 + }, + { + "epoch": 0.56, + "learning_rate": 1.908226329913612e-05, + "loss": 0.5513, + "step": 825 + }, + { + "epoch": 0.56, + "learning_rate": 1.9080045006941828e-05, + "loss": 0.5757, + "step": 826 + }, + { + "epoch": 0.56, + "learning_rate": 1.9077824166284434e-05, + "loss": 0.579, + "step": 827 + }, + { + "epoch": 0.56, + "learning_rate": 1.9075600777787256e-05, + "loss": 0.5777, + "step": 828 + }, + { + "epoch": 0.56, + "learning_rate": 1.9073374842074323e-05, + "loss": 0.5759, + "step": 829 + }, + { + "epoch": 0.56, + "learning_rate": 1.9071146359770384e-05, + "loss": 0.5795, + "step": 830 + }, + { + "epoch": 0.56, + "learning_rate": 1.906891533150089e-05, + "loss": 0.5528, + "step": 831 + }, + { + "epoch": 0.56, + "learning_rate": 1.9066681757892026e-05, + "loss": 0.5879, + "step": 832 + }, + { + "epoch": 0.56, + "learning_rate": 1.9064445639570675e-05, + "loss": 0.6238, + "step": 833 + }, + { + "epoch": 0.56, + "learning_rate": 1.906220697716444e-05, + "loss": 0.5294, + "step": 834 + }, + { + "epoch": 0.56, + "learning_rate": 1.9059965771301644e-05, + "loss": 0.5764, + "step": 835 + }, + { + "epoch": 0.56, + "learning_rate": 1.905772202261131e-05, + "loss": 0.5574, + "step": 836 + }, + { + "epoch": 0.56, + "learning_rate": 1.9055475731723187e-05, + "loss": 0.5675, + "step": 837 + }, + { + "epoch": 0.56, + "learning_rate": 1.905322689926773e-05, + "loss": 0.5536, + "step": 838 + }, + { + "epoch": 0.56, + "learning_rate": 1.905097552587612e-05, + "loss": 0.5418, + "step": 839 + }, + { + "epoch": 0.57, + "learning_rate": 1.9048721612180232e-05, + "loss": 0.5389, + "step": 840 + }, + { + "epoch": 0.57, + "learning_rate": 1.9046465158812666e-05, + "loss": 0.5604, + "step": 841 + }, + { + "epoch": 0.57, + "learning_rate": 1.9044206166406734e-05, + "loss": 0.5755, + "step": 842 + }, + { + "epoch": 0.57, + "learning_rate": 1.904194463559646e-05, + "loss": 0.5955, + "step": 843 + }, + { + "epoch": 0.57, + "learning_rate": 1.9039680567016576e-05, + "loss": 0.5437, + "step": 844 + }, + { + "epoch": 0.57, + "learning_rate": 1.9037413961302534e-05, + "loss": 0.5514, + "step": 845 + }, + { + "epoch": 0.57, + "learning_rate": 1.903514481909049e-05, + "loss": 0.544, + "step": 846 + }, + { + "epoch": 0.57, + "learning_rate": 1.9032873141017316e-05, + "loss": 0.5387, + "step": 847 + }, + { + "epoch": 0.57, + "learning_rate": 1.9030598927720603e-05, + "loss": 0.5393, + "step": 848 + }, + { + "epoch": 0.57, + "learning_rate": 1.9028322179838636e-05, + "loss": 0.518, + "step": 849 + }, + { + "epoch": 0.57, + "learning_rate": 1.9026042898010428e-05, + "loss": 0.5585, + "step": 850 + }, + { + "epoch": 0.57, + "learning_rate": 1.9023761082875695e-05, + "loss": 0.6034, + "step": 851 + }, + { + "epoch": 0.57, + "learning_rate": 1.9021476735074865e-05, + "loss": 0.5387, + "step": 852 + }, + { + "epoch": 0.57, + "learning_rate": 1.9019189855249078e-05, + "loss": 0.5533, + "step": 853 + }, + { + "epoch": 0.58, + "learning_rate": 1.9016900444040184e-05, + "loss": 0.5328, + "step": 854 + }, + { + "epoch": 0.58, + "learning_rate": 1.9014608502090744e-05, + "loss": 0.5882, + "step": 855 + }, + { + "epoch": 0.58, + "learning_rate": 1.901231403004403e-05, + "loss": 0.5415, + "step": 856 + }, + { + "epoch": 0.58, + "learning_rate": 1.9010017028544023e-05, + "loss": 0.6163, + "step": 857 + }, + { + "epoch": 0.58, + "learning_rate": 1.9007717498235412e-05, + "loss": 0.5451, + "step": 858 + }, + { + "epoch": 0.58, + "learning_rate": 1.90054154397636e-05, + "loss": 0.5427, + "step": 859 + }, + { + "epoch": 0.58, + "learning_rate": 1.9003110853774694e-05, + "loss": 0.6107, + "step": 860 + }, + { + "epoch": 0.58, + "learning_rate": 1.9000803740915515e-05, + "loss": 0.5687, + "step": 861 + }, + { + "epoch": 0.58, + "learning_rate": 1.89984941018336e-05, + "loss": 0.5573, + "step": 862 + }, + { + "epoch": 0.58, + "learning_rate": 1.8996181937177173e-05, + "loss": 0.5551, + "step": 863 + }, + { + "epoch": 0.58, + "learning_rate": 1.899386724759519e-05, + "loss": 0.5538, + "step": 864 + }, + { + "epoch": 0.58, + "learning_rate": 1.89915500337373e-05, + "loss": 0.5824, + "step": 865 + }, + { + "epoch": 0.58, + "learning_rate": 1.898923029625388e-05, + "loss": 0.6187, + "step": 866 + }, + { + "epoch": 0.58, + "learning_rate": 1.8986908035795986e-05, + "loss": 0.6308, + "step": 867 + }, + { + "epoch": 0.58, + "learning_rate": 1.898458325301541e-05, + "loss": 0.5597, + "step": 868 + }, + { + "epoch": 0.59, + "learning_rate": 1.8982255948564635e-05, + "loss": 0.5874, + "step": 869 + }, + { + "epoch": 0.59, + "learning_rate": 1.8979926123096858e-05, + "loss": 0.5634, + "step": 870 + }, + { + "epoch": 0.59, + "learning_rate": 1.8977593777265978e-05, + "loss": 0.5411, + "step": 871 + }, + { + "epoch": 0.59, + "learning_rate": 1.8975258911726616e-05, + "loss": 0.5591, + "step": 872 + }, + { + "epoch": 0.59, + "learning_rate": 1.8972921527134085e-05, + "loss": 0.5356, + "step": 873 + }, + { + "epoch": 0.59, + "learning_rate": 1.8970581624144412e-05, + "loss": 0.5493, + "step": 874 + }, + { + "epoch": 0.59, + "learning_rate": 1.896823920341432e-05, + "loss": 0.5647, + "step": 875 + }, + { + "epoch": 0.59, + "learning_rate": 1.8965894265601263e-05, + "loss": 0.6083, + "step": 876 + }, + { + "epoch": 0.59, + "learning_rate": 1.8963546811363375e-05, + "loss": 0.5754, + "step": 877 + }, + { + "epoch": 0.59, + "learning_rate": 1.8961196841359507e-05, + "loss": 0.5571, + "step": 878 + }, + { + "epoch": 0.59, + "learning_rate": 1.8958844356249223e-05, + "loss": 0.5892, + "step": 879 + }, + { + "epoch": 0.59, + "learning_rate": 1.895648935669278e-05, + "loss": 0.5377, + "step": 880 + }, + { + "epoch": 0.59, + "learning_rate": 1.895413184335116e-05, + "loss": 0.5311, + "step": 881 + }, + { + "epoch": 0.59, + "learning_rate": 1.895177181688602e-05, + "loss": 0.524, + "step": 882 + }, + { + "epoch": 0.59, + "learning_rate": 1.894940927795975e-05, + "loss": 0.5935, + "step": 883 + }, + { + "epoch": 0.6, + "learning_rate": 1.894704422723543e-05, + "loss": 0.5717, + "step": 884 + }, + { + "epoch": 0.6, + "learning_rate": 1.8944676665376858e-05, + "loss": 0.5819, + "step": 885 + }, + { + "epoch": 0.6, + "learning_rate": 1.8942306593048522e-05, + "loss": 0.5643, + "step": 886 + }, + { + "epoch": 0.6, + "learning_rate": 1.8939934010915627e-05, + "loss": 0.5349, + "step": 887 + }, + { + "epoch": 0.6, + "learning_rate": 1.893755891964407e-05, + "loss": 0.5445, + "step": 888 + }, + { + "epoch": 0.6, + "learning_rate": 1.8935181319900467e-05, + "loss": 0.526, + "step": 889 + }, + { + "epoch": 0.6, + "learning_rate": 1.8932801212352124e-05, + "loss": 0.5165, + "step": 890 + }, + { + "epoch": 0.6, + "learning_rate": 1.893041859766706e-05, + "loss": 0.5332, + "step": 891 + }, + { + "epoch": 0.6, + "learning_rate": 1.8928033476513996e-05, + "loss": 0.5564, + "step": 892 + }, + { + "epoch": 0.6, + "learning_rate": 1.892564584956235e-05, + "loss": 0.5446, + "step": 893 + }, + { + "epoch": 0.6, + "learning_rate": 1.8923255717482256e-05, + "loss": 0.5496, + "step": 894 + }, + { + "epoch": 0.6, + "learning_rate": 1.8920863080944534e-05, + "loss": 0.5787, + "step": 895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8918467940620728e-05, + "loss": 0.5992, + "step": 896 + }, + { + "epoch": 0.6, + "learning_rate": 1.8916070297183066e-05, + "loss": 0.5864, + "step": 897 + }, + { + "epoch": 0.6, + "learning_rate": 1.891367015130449e-05, + "loss": 0.5299, + "step": 898 + }, + { + "epoch": 0.61, + "learning_rate": 1.891126750365863e-05, + "loss": 0.5238, + "step": 899 + }, + { + "epoch": 0.61, + "learning_rate": 1.8908862354919843e-05, + "loss": 0.5545, + "step": 900 + }, + { + "epoch": 0.61, + "learning_rate": 1.8906454705763163e-05, + "loss": 0.5887, + "step": 901 + }, + { + "epoch": 0.61, + "learning_rate": 1.8904044556864343e-05, + "loss": 0.5461, + "step": 902 + }, + { + "epoch": 0.61, + "learning_rate": 1.8901631908899824e-05, + "loss": 0.5636, + "step": 903 + }, + { + "epoch": 0.61, + "learning_rate": 1.8899216762546758e-05, + "loss": 0.5595, + "step": 904 + }, + { + "epoch": 0.61, + "learning_rate": 1.8896799118482995e-05, + "loss": 0.5476, + "step": 905 + }, + { + "epoch": 0.61, + "learning_rate": 1.889437897738709e-05, + "loss": 0.5804, + "step": 906 + }, + { + "epoch": 0.61, + "learning_rate": 1.8891956339938287e-05, + "loss": 0.5566, + "step": 907 + }, + { + "epoch": 0.61, + "learning_rate": 1.8889531206816546e-05, + "loss": 0.5321, + "step": 908 + }, + { + "epoch": 0.61, + "learning_rate": 1.888710357870252e-05, + "loss": 0.5781, + "step": 909 + }, + { + "epoch": 0.61, + "learning_rate": 1.888467345627756e-05, + "loss": 0.5637, + "step": 910 + }, + { + "epoch": 0.61, + "learning_rate": 1.888224084022372e-05, + "loss": 0.5695, + "step": 911 + }, + { + "epoch": 0.61, + "learning_rate": 1.8879805731223752e-05, + "loss": 0.5982, + "step": 912 + }, + { + "epoch": 0.61, + "learning_rate": 1.8877368129961115e-05, + "loss": 0.5444, + "step": 913 + }, + { + "epoch": 0.62, + "learning_rate": 1.8874928037119953e-05, + "loss": 0.5021, + "step": 914 + }, + { + "epoch": 0.62, + "learning_rate": 1.8872485453385124e-05, + "loss": 0.5272, + "step": 915 + }, + { + "epoch": 0.62, + "learning_rate": 1.8870040379442175e-05, + "loss": 0.5637, + "step": 916 + }, + { + "epoch": 0.62, + "learning_rate": 1.8867592815977362e-05, + "loss": 0.5697, + "step": 917 + }, + { + "epoch": 0.62, + "learning_rate": 1.886514276367763e-05, + "loss": 0.5661, + "step": 918 + }, + { + "epoch": 0.62, + "learning_rate": 1.8862690223230625e-05, + "loss": 0.5432, + "step": 919 + }, + { + "epoch": 0.62, + "learning_rate": 1.8860235195324695e-05, + "loss": 0.5486, + "step": 920 + }, + { + "epoch": 0.62, + "learning_rate": 1.8857777680648884e-05, + "loss": 0.5475, + "step": 921 + }, + { + "epoch": 0.62, + "learning_rate": 1.885531767989293e-05, + "loss": 0.5502, + "step": 922 + }, + { + "epoch": 0.62, + "learning_rate": 1.8852855193747274e-05, + "loss": 0.536, + "step": 923 + }, + { + "epoch": 0.62, + "learning_rate": 1.8850390222903057e-05, + "loss": 0.5224, + "step": 924 + }, + { + "epoch": 0.62, + "learning_rate": 1.8847922768052105e-05, + "loss": 0.5563, + "step": 925 + }, + { + "epoch": 0.62, + "learning_rate": 1.8845452829886957e-05, + "loss": 0.5282, + "step": 926 + }, + { + "epoch": 0.62, + "learning_rate": 1.8842980409100845e-05, + "loss": 0.5655, + "step": 927 + }, + { + "epoch": 0.62, + "learning_rate": 1.8840505506387684e-05, + "loss": 0.5653, + "step": 928 + }, + { + "epoch": 0.63, + "learning_rate": 1.88380281224421e-05, + "loss": 0.5747, + "step": 929 + }, + { + "epoch": 0.63, + "learning_rate": 1.8835548257959413e-05, + "loss": 0.5567, + "step": 930 + }, + { + "epoch": 0.63, + "learning_rate": 1.8833065913635634e-05, + "loss": 0.5576, + "step": 931 + }, + { + "epoch": 0.63, + "learning_rate": 1.8830581090167477e-05, + "loss": 0.5928, + "step": 932 + }, + { + "epoch": 0.63, + "learning_rate": 1.8828093788252348e-05, + "loss": 0.6024, + "step": 933 + }, + { + "epoch": 0.63, + "learning_rate": 1.882560400858835e-05, + "loss": 0.5754, + "step": 934 + }, + { + "epoch": 0.63, + "learning_rate": 1.8823111751874277e-05, + "loss": 0.6022, + "step": 935 + }, + { + "epoch": 0.63, + "learning_rate": 1.882061701880962e-05, + "loss": 0.5968, + "step": 936 + }, + { + "epoch": 0.63, + "learning_rate": 1.8818119810094576e-05, + "loss": 0.5366, + "step": 937 + }, + { + "epoch": 0.63, + "learning_rate": 1.8815620126430016e-05, + "loss": 0.5694, + "step": 938 + }, + { + "epoch": 0.63, + "learning_rate": 1.8813117968517524e-05, + "loss": 0.5477, + "step": 939 + }, + { + "epoch": 0.63, + "learning_rate": 1.881061333705937e-05, + "loss": 0.5917, + "step": 940 + }, + { + "epoch": 0.63, + "learning_rate": 1.880810623275852e-05, + "loss": 0.5322, + "step": 941 + }, + { + "epoch": 0.63, + "learning_rate": 1.8805596656318635e-05, + "loss": 0.5812, + "step": 942 + }, + { + "epoch": 0.63, + "learning_rate": 1.8803084608444063e-05, + "loss": 0.5655, + "step": 943 + }, + { + "epoch": 0.64, + "learning_rate": 1.880057008983986e-05, + "loss": 0.541, + "step": 944 + }, + { + "epoch": 0.64, + "learning_rate": 1.879805310121176e-05, + "loss": 0.5597, + "step": 945 + }, + { + "epoch": 0.64, + "learning_rate": 1.87955336432662e-05, + "loss": 0.5374, + "step": 946 + }, + { + "epoch": 0.64, + "learning_rate": 1.8793011716710302e-05, + "loss": 0.5676, + "step": 947 + }, + { + "epoch": 0.64, + "learning_rate": 1.8790487322251896e-05, + "loss": 0.5439, + "step": 948 + }, + { + "epoch": 0.64, + "learning_rate": 1.8787960460599482e-05, + "loss": 0.5994, + "step": 949 + }, + { + "epoch": 0.64, + "learning_rate": 1.8785431132462278e-05, + "loss": 0.531, + "step": 950 + }, + { + "epoch": 0.64, + "learning_rate": 1.8782899338550175e-05, + "loss": 0.5691, + "step": 951 + }, + { + "epoch": 0.64, + "learning_rate": 1.878036507957376e-05, + "loss": 0.5385, + "step": 952 + }, + { + "epoch": 0.64, + "learning_rate": 1.8777828356244316e-05, + "loss": 0.5474, + "step": 953 + }, + { + "epoch": 0.64, + "learning_rate": 1.877528916927382e-05, + "loss": 0.5952, + "step": 954 + }, + { + "epoch": 0.64, + "learning_rate": 1.8772747519374927e-05, + "loss": 0.5825, + "step": 955 + }, + { + "epoch": 0.64, + "learning_rate": 1.8770203407261004e-05, + "loss": 0.5566, + "step": 956 + }, + { + "epoch": 0.64, + "learning_rate": 1.876765683364609e-05, + "loss": 0.5683, + "step": 957 + }, + { + "epoch": 0.65, + "learning_rate": 1.8765107799244927e-05, + "loss": 0.5128, + "step": 958 + }, + { + "epoch": 0.65, + "learning_rate": 1.8762556304772936e-05, + "loss": 0.5252, + "step": 959 + }, + { + "epoch": 0.65, + "learning_rate": 1.8760002350946244e-05, + "loss": 0.5762, + "step": 960 + }, + { + "epoch": 0.65, + "learning_rate": 1.8757445938481654e-05, + "loss": 0.5178, + "step": 961 + }, + { + "epoch": 0.65, + "learning_rate": 1.8754887068096672e-05, + "loss": 0.5271, + "step": 962 + }, + { + "epoch": 0.65, + "learning_rate": 1.875232574050948e-05, + "loss": 0.5369, + "step": 963 + }, + { + "epoch": 0.65, + "learning_rate": 1.8749761956438956e-05, + "loss": 0.6264, + "step": 964 + }, + { + "epoch": 0.65, + "learning_rate": 1.8747195716604675e-05, + "loss": 0.5599, + "step": 965 + }, + { + "epoch": 0.65, + "learning_rate": 1.8744627021726887e-05, + "loss": 0.5862, + "step": 966 + }, + { + "epoch": 0.65, + "learning_rate": 1.874205587252654e-05, + "loss": 0.5156, + "step": 967 + }, + { + "epoch": 0.65, + "learning_rate": 1.8739482269725272e-05, + "loss": 0.5341, + "step": 968 + }, + { + "epoch": 0.65, + "learning_rate": 1.8736906214045405e-05, + "loss": 0.518, + "step": 969 + }, + { + "epoch": 0.65, + "learning_rate": 1.873432770620995e-05, + "loss": 0.5551, + "step": 970 + }, + { + "epoch": 0.65, + "learning_rate": 1.8731746746942606e-05, + "loss": 0.5534, + "step": 971 + }, + { + "epoch": 0.65, + "learning_rate": 1.8729163336967767e-05, + "loss": 0.5365, + "step": 972 + }, + { + "epoch": 0.66, + "learning_rate": 1.8726577477010506e-05, + "loss": 0.5266, + "step": 973 + }, + { + "epoch": 0.66, + "learning_rate": 1.8723989167796585e-05, + "loss": 0.5612, + "step": 974 + }, + { + "epoch": 0.66, + "learning_rate": 1.872139841005246e-05, + "loss": 0.5674, + "step": 975 + }, + { + "epoch": 0.66, + "learning_rate": 1.8718805204505265e-05, + "loss": 0.56, + "step": 976 + }, + { + "epoch": 0.66, + "learning_rate": 1.8716209551882826e-05, + "loss": 0.5646, + "step": 977 + }, + { + "epoch": 0.66, + "learning_rate": 1.8713611452913657e-05, + "loss": 0.5802, + "step": 978 + }, + { + "epoch": 0.66, + "learning_rate": 1.8711010908326957e-05, + "loss": 0.5417, + "step": 979 + }, + { + "epoch": 0.66, + "learning_rate": 1.8708407918852608e-05, + "loss": 0.5585, + "step": 980 + }, + { + "epoch": 0.66, + "learning_rate": 1.870580248522119e-05, + "loss": 0.5539, + "step": 981 + }, + { + "epoch": 0.66, + "learning_rate": 1.870319460816395e-05, + "loss": 0.5482, + "step": 982 + }, + { + "epoch": 0.66, + "learning_rate": 1.8700584288412836e-05, + "loss": 0.5743, + "step": 983 + }, + { + "epoch": 0.66, + "learning_rate": 1.8697971526700482e-05, + "loss": 0.5737, + "step": 984 + }, + { + "epoch": 0.66, + "learning_rate": 1.8695356323760197e-05, + "loss": 0.5375, + "step": 985 + }, + { + "epoch": 0.66, + "learning_rate": 1.8692738680325976e-05, + "loss": 0.5992, + "step": 986 + }, + { + "epoch": 0.66, + "learning_rate": 1.8690118597132514e-05, + "loss": 0.5642, + "step": 987 + }, + { + "epoch": 0.67, + "learning_rate": 1.868749607491517e-05, + "loss": 0.5322, + "step": 988 + }, + { + "epoch": 0.67, + "learning_rate": 1.8684871114410006e-05, + "loss": 0.5381, + "step": 989 + }, + { + "epoch": 0.67, + "learning_rate": 1.8682243716353754e-05, + "loss": 0.5528, + "step": 990 + }, + { + "epoch": 0.67, + "learning_rate": 1.867961388148384e-05, + "loss": 0.5554, + "step": 991 + }, + { + "epoch": 0.67, + "learning_rate": 1.8676981610538366e-05, + "loss": 0.5264, + "step": 992 + }, + { + "epoch": 0.67, + "learning_rate": 1.867434690425613e-05, + "loss": 0.5763, + "step": 993 + }, + { + "epoch": 0.67, + "learning_rate": 1.8671709763376596e-05, + "loss": 0.6016, + "step": 994 + }, + { + "epoch": 0.67, + "learning_rate": 1.8669070188639924e-05, + "loss": 0.5204, + "step": 995 + }, + { + "epoch": 0.67, + "learning_rate": 1.8666428180786956e-05, + "loss": 0.5293, + "step": 996 + }, + { + "epoch": 0.67, + "learning_rate": 1.8663783740559214e-05, + "loss": 0.5339, + "step": 997 + }, + { + "epoch": 0.67, + "learning_rate": 1.8661136868698903e-05, + "loss": 0.5588, + "step": 998 + }, + { + "epoch": 0.67, + "learning_rate": 1.865848756594891e-05, + "loss": 0.5394, + "step": 999 + }, + { + "epoch": 0.67, + "learning_rate": 1.8655835833052808e-05, + "loss": 0.5522, + "step": 1000 + }, + { + "epoch": 0.67, + "learning_rate": 1.8653181670754845e-05, + "loss": 0.6408, + "step": 1001 + }, + { + "epoch": 0.67, + "learning_rate": 1.8650525079799955e-05, + "loss": 0.6042, + "step": 1002 + }, + { + "epoch": 0.68, + "learning_rate": 1.8647866060933756e-05, + "loss": 0.5654, + "step": 1003 + }, + { + "epoch": 0.68, + "learning_rate": 1.8645204614902548e-05, + "loss": 0.5483, + "step": 1004 + }, + { + "epoch": 0.68, + "learning_rate": 1.8642540742453302e-05, + "loss": 0.5671, + "step": 1005 + }, + { + "epoch": 0.68, + "learning_rate": 1.8639874444333687e-05, + "loss": 0.5773, + "step": 1006 + }, + { + "epoch": 0.68, + "learning_rate": 1.8637205721292033e-05, + "loss": 0.5795, + "step": 1007 + }, + { + "epoch": 0.68, + "learning_rate": 1.8634534574077365e-05, + "loss": 0.5965, + "step": 1008 + }, + { + "epoch": 0.68, + "learning_rate": 1.8631861003439388e-05, + "loss": 0.5701, + "step": 1009 + }, + { + "epoch": 0.68, + "learning_rate": 1.8629185010128478e-05, + "loss": 0.5733, + "step": 1010 + }, + { + "epoch": 0.68, + "learning_rate": 1.8626506594895696e-05, + "loss": 0.5599, + "step": 1011 + }, + { + "epoch": 0.68, + "learning_rate": 1.8623825758492788e-05, + "loss": 0.5474, + "step": 1012 + }, + { + "epoch": 0.68, + "learning_rate": 1.8621142501672175e-05, + "loss": 0.5942, + "step": 1013 + }, + { + "epoch": 0.68, + "learning_rate": 1.861845682518695e-05, + "loss": 0.578, + "step": 1014 + }, + { + "epoch": 0.68, + "learning_rate": 1.8615768729790893e-05, + "loss": 0.5267, + "step": 1015 + }, + { + "epoch": 0.68, + "learning_rate": 1.8613078216238468e-05, + "loss": 0.548, + "step": 1016 + }, + { + "epoch": 0.68, + "learning_rate": 1.8610385285284806e-05, + "loss": 0.5534, + "step": 1017 + }, + { + "epoch": 0.69, + "learning_rate": 1.8607689937685728e-05, + "loss": 0.554, + "step": 1018 + }, + { + "epoch": 0.69, + "learning_rate": 1.8604992174197716e-05, + "loss": 0.5538, + "step": 1019 + }, + { + "epoch": 0.69, + "learning_rate": 1.8602291995577957e-05, + "loss": 0.5392, + "step": 1020 + }, + { + "epoch": 0.69, + "learning_rate": 1.8599589402584288e-05, + "loss": 0.552, + "step": 1021 + }, + { + "epoch": 0.69, + "learning_rate": 1.8596884395975242e-05, + "loss": 0.5482, + "step": 1022 + }, + { + "epoch": 0.69, + "learning_rate": 1.859417697651002e-05, + "loss": 0.5613, + "step": 1023 + }, + { + "epoch": 0.69, + "learning_rate": 1.859146714494851e-05, + "loss": 0.5874, + "step": 1024 + }, + { + "epoch": 0.69, + "learning_rate": 1.8588754902051262e-05, + "loss": 0.5678, + "step": 1025 + }, + { + "epoch": 0.69, + "learning_rate": 1.8586040248579515e-05, + "loss": 0.5738, + "step": 1026 + }, + { + "epoch": 0.69, + "learning_rate": 1.858332318529518e-05, + "loss": 0.5775, + "step": 1027 + }, + { + "epoch": 0.69, + "learning_rate": 1.858060371296085e-05, + "loss": 0.569, + "step": 1028 + }, + { + "epoch": 0.69, + "learning_rate": 1.857788183233978e-05, + "loss": 0.5369, + "step": 1029 + }, + { + "epoch": 0.69, + "learning_rate": 1.857515754419592e-05, + "loss": 0.5448, + "step": 1030 + }, + { + "epoch": 0.69, + "learning_rate": 1.8572430849293882e-05, + "loss": 0.5283, + "step": 1031 + }, + { + "epoch": 0.69, + "learning_rate": 1.8569701748398954e-05, + "loss": 0.5402, + "step": 1032 + }, + { + "epoch": 0.7, + "learning_rate": 1.856697024227711e-05, + "loss": 0.5652, + "step": 1033 + }, + { + "epoch": 0.7, + "learning_rate": 1.8564236331694988e-05, + "loss": 0.5265, + "step": 1034 + }, + { + "epoch": 0.7, + "learning_rate": 1.8561500017419902e-05, + "loss": 0.5431, + "step": 1035 + }, + { + "epoch": 0.7, + "learning_rate": 1.8558761300219846e-05, + "loss": 0.5301, + "step": 1036 + }, + { + "epoch": 0.7, + "learning_rate": 1.8556020180863487e-05, + "loss": 0.5852, + "step": 1037 + }, + { + "epoch": 0.7, + "learning_rate": 1.8553276660120164e-05, + "loss": 0.5673, + "step": 1038 + }, + { + "epoch": 0.7, + "learning_rate": 1.855053073875989e-05, + "loss": 0.5536, + "step": 1039 + }, + { + "epoch": 0.7, + "learning_rate": 1.8547782417553355e-05, + "loss": 0.5695, + "step": 1040 + }, + { + "epoch": 0.7, + "learning_rate": 1.854503169727192e-05, + "loss": 0.547, + "step": 1041 + }, + { + "epoch": 0.7, + "learning_rate": 1.8542278578687614e-05, + "loss": 0.5649, + "step": 1042 + }, + { + "epoch": 0.7, + "learning_rate": 1.853952306257315e-05, + "loss": 0.5144, + "step": 1043 + }, + { + "epoch": 0.7, + "learning_rate": 1.853676514970191e-05, + "loss": 0.5283, + "step": 1044 + }, + { + "epoch": 0.7, + "learning_rate": 1.8534004840847943e-05, + "loss": 0.5587, + "step": 1045 + }, + { + "epoch": 0.7, + "learning_rate": 1.853124213678598e-05, + "loss": 0.5484, + "step": 1046 + }, + { + "epoch": 0.7, + "learning_rate": 1.8528477038291416e-05, + "loss": 0.5718, + "step": 1047 + }, + { + "epoch": 0.71, + "learning_rate": 1.8525709546140314e-05, + "loss": 0.5829, + "step": 1048 + }, + { + "epoch": 0.71, + "learning_rate": 1.852293966110943e-05, + "loss": 0.6006, + "step": 1049 + }, + { + "epoch": 0.71, + "learning_rate": 1.8520167383976168e-05, + "loss": 0.5346, + "step": 1050 + }, + { + "epoch": 0.71, + "learning_rate": 1.851739271551862e-05, + "loss": 0.5626, + "step": 1051 + }, + { + "epoch": 0.71, + "learning_rate": 1.8514615656515534e-05, + "loss": 0.553, + "step": 1052 + }, + { + "epoch": 0.71, + "learning_rate": 1.851183620774634e-05, + "loss": 0.5863, + "step": 1053 + }, + { + "epoch": 0.71, + "learning_rate": 1.8509054369991136e-05, + "loss": 0.5691, + "step": 1054 + }, + { + "epoch": 0.71, + "learning_rate": 1.850627014403069e-05, + "loss": 0.5806, + "step": 1055 + }, + { + "epoch": 0.71, + "learning_rate": 1.8503483530646443e-05, + "loss": 0.6041, + "step": 1056 + }, + { + "epoch": 0.71, + "learning_rate": 1.85006945306205e-05, + "loss": 0.5592, + "step": 1057 + }, + { + "epoch": 0.71, + "learning_rate": 1.8497903144735643e-05, + "loss": 0.5467, + "step": 1058 + }, + { + "epoch": 0.71, + "learning_rate": 1.8495109373775317e-05, + "loss": 0.5648, + "step": 1059 + }, + { + "epoch": 0.71, + "learning_rate": 1.849231321852364e-05, + "loss": 0.5405, + "step": 1060 + }, + { + "epoch": 0.71, + "learning_rate": 1.8489514679765402e-05, + "loss": 0.5315, + "step": 1061 + }, + { + "epoch": 0.72, + "learning_rate": 1.8486713758286052e-05, + "loss": 0.5274, + "step": 1062 + }, + { + "epoch": 0.72, + "learning_rate": 1.8483910454871724e-05, + "loss": 0.5696, + "step": 1063 + }, + { + "epoch": 0.72, + "learning_rate": 1.84811047703092e-05, + "loss": 0.54, + "step": 1064 + }, + { + "epoch": 0.72, + "learning_rate": 1.8478296705385953e-05, + "loss": 0.5545, + "step": 1065 + }, + { + "epoch": 0.72, + "learning_rate": 1.84754862608901e-05, + "loss": 0.5261, + "step": 1066 + }, + { + "epoch": 0.72, + "learning_rate": 1.8472673437610448e-05, + "loss": 0.5648, + "step": 1067 + }, + { + "epoch": 0.72, + "learning_rate": 1.8469858236336456e-05, + "loss": 0.5383, + "step": 1068 + }, + { + "epoch": 0.72, + "learning_rate": 1.846704065785826e-05, + "loss": 0.5471, + "step": 1069 + }, + { + "epoch": 0.72, + "learning_rate": 1.8464220702966656e-05, + "loss": 0.5655, + "step": 1070 + }, + { + "epoch": 0.72, + "learning_rate": 1.8461398372453113e-05, + "loss": 0.56, + "step": 1071 + }, + { + "epoch": 0.72, + "learning_rate": 1.8458573667109765e-05, + "loss": 0.5315, + "step": 1072 + }, + { + "epoch": 0.72, + "learning_rate": 1.845574658772941e-05, + "loss": 0.5664, + "step": 1073 + }, + { + "epoch": 0.72, + "learning_rate": 1.8452917135105513e-05, + "loss": 0.6063, + "step": 1074 + }, + { + "epoch": 0.72, + "learning_rate": 1.8450085310032206e-05, + "loss": 0.5747, + "step": 1075 + }, + { + "epoch": 0.72, + "learning_rate": 1.844725111330429e-05, + "loss": 0.5459, + "step": 1076 + }, + { + "epoch": 0.73, + "learning_rate": 1.844441454571723e-05, + "loss": 0.5482, + "step": 1077 + }, + { + "epoch": 0.73, + "learning_rate": 1.844157560806715e-05, + "loss": 0.5597, + "step": 1078 + }, + { + "epoch": 0.73, + "learning_rate": 1.8438734301150845e-05, + "loss": 0.5262, + "step": 1079 + }, + { + "epoch": 0.73, + "learning_rate": 1.8435890625765776e-05, + "loss": 0.5731, + "step": 1080 + }, + { + "epoch": 0.73, + "learning_rate": 1.8433044582710067e-05, + "loss": 0.5268, + "step": 1081 + }, + { + "epoch": 0.73, + "learning_rate": 1.8430196172782505e-05, + "loss": 0.5854, + "step": 1082 + }, + { + "epoch": 0.73, + "learning_rate": 1.8427345396782547e-05, + "loss": 0.5544, + "step": 1083 + }, + { + "epoch": 0.73, + "learning_rate": 1.8424492255510305e-05, + "loss": 0.5715, + "step": 1084 + }, + { + "epoch": 0.73, + "learning_rate": 1.8421636749766563e-05, + "loss": 0.5682, + "step": 1085 + }, + { + "epoch": 0.73, + "learning_rate": 1.841877888035276e-05, + "loss": 0.561, + "step": 1086 + }, + { + "epoch": 0.73, + "learning_rate": 1.8415918648071014e-05, + "loss": 0.5562, + "step": 1087 + }, + { + "epoch": 0.73, + "learning_rate": 1.8413056053724086e-05, + "loss": 0.5626, + "step": 1088 + }, + { + "epoch": 0.73, + "learning_rate": 1.841019109811541e-05, + "loss": 0.5773, + "step": 1089 + }, + { + "epoch": 0.73, + "learning_rate": 1.8407323782049093e-05, + "loss": 0.5336, + "step": 1090 + }, + { + "epoch": 0.73, + "learning_rate": 1.8404454106329886e-05, + "loss": 0.516, + "step": 1091 + }, + { + "epoch": 0.74, + "learning_rate": 1.840158207176321e-05, + "loss": 0.5546, + "step": 1092 + }, + { + "epoch": 0.74, + "learning_rate": 1.8398707679155154e-05, + "loss": 0.5857, + "step": 1093 + }, + { + "epoch": 0.74, + "learning_rate": 1.8395830929312457e-05, + "loss": 0.5659, + "step": 1094 + }, + { + "epoch": 0.74, + "learning_rate": 1.8392951823042525e-05, + "loss": 0.5407, + "step": 1095 + }, + { + "epoch": 0.74, + "learning_rate": 1.8390070361153436e-05, + "loss": 0.562, + "step": 1096 + }, + { + "epoch": 0.74, + "learning_rate": 1.838718654445391e-05, + "loss": 0.5819, + "step": 1097 + }, + { + "epoch": 0.74, + "learning_rate": 1.838430037375334e-05, + "loss": 0.5558, + "step": 1098 + }, + { + "epoch": 0.74, + "learning_rate": 1.838141184986178e-05, + "loss": 0.576, + "step": 1099 + }, + { + "epoch": 0.74, + "learning_rate": 1.8378520973589937e-05, + "loss": 0.5851, + "step": 1100 + }, + { + "epoch": 0.74, + "learning_rate": 1.8375627745749184e-05, + "loss": 0.5467, + "step": 1101 + }, + { + "epoch": 0.74, + "learning_rate": 1.8372732167151556e-05, + "loss": 0.5627, + "step": 1102 + }, + { + "epoch": 0.74, + "learning_rate": 1.836983423860974e-05, + "loss": 0.5335, + "step": 1103 + }, + { + "epoch": 0.74, + "learning_rate": 1.836693396093709e-05, + "loss": 0.566, + "step": 1104 + }, + { + "epoch": 0.74, + "learning_rate": 1.8364031334947612e-05, + "loss": 0.5558, + "step": 1105 + }, + { + "epoch": 0.74, + "learning_rate": 1.836112636145598e-05, + "loss": 0.5288, + "step": 1106 + }, + { + "epoch": 0.75, + "learning_rate": 1.8358219041277523e-05, + "loss": 0.5459, + "step": 1107 + }, + { + "epoch": 0.75, + "learning_rate": 1.8355309375228225e-05, + "loss": 0.5884, + "step": 1108 + }, + { + "epoch": 0.75, + "learning_rate": 1.835239736412473e-05, + "loss": 0.5693, + "step": 1109 + }, + { + "epoch": 0.75, + "learning_rate": 1.8349483008784346e-05, + "loss": 0.5611, + "step": 1110 + }, + { + "epoch": 0.75, + "learning_rate": 1.834656631002503e-05, + "loss": 0.5495, + "step": 1111 + }, + { + "epoch": 0.75, + "learning_rate": 1.8343647268665407e-05, + "loss": 0.5381, + "step": 1112 + }, + { + "epoch": 0.75, + "learning_rate": 1.8340725885524747e-05, + "loss": 0.5539, + "step": 1113 + }, + { + "epoch": 0.75, + "learning_rate": 1.8337802161422987e-05, + "loss": 0.5514, + "step": 1114 + }, + { + "epoch": 0.75, + "learning_rate": 1.833487609718072e-05, + "loss": 0.5761, + "step": 1115 + }, + { + "epoch": 0.75, + "learning_rate": 1.8331947693619195e-05, + "loss": 0.5259, + "step": 1116 + }, + { + "epoch": 0.75, + "learning_rate": 1.8329016951560307e-05, + "loss": 0.5635, + "step": 1117 + }, + { + "epoch": 0.75, + "learning_rate": 1.8326083871826626e-05, + "loss": 0.5649, + "step": 1118 + }, + { + "epoch": 0.75, + "learning_rate": 1.8323148455241363e-05, + "loss": 0.5648, + "step": 1119 + }, + { + "epoch": 0.75, + "learning_rate": 1.8320210702628397e-05, + "loss": 0.5527, + "step": 1120 + }, + { + "epoch": 0.75, + "learning_rate": 1.831727061481225e-05, + "loss": 0.5816, + "step": 1121 + }, + { + "epoch": 0.76, + "learning_rate": 1.831432819261811e-05, + "loss": 0.5715, + "step": 1122 + }, + { + "epoch": 0.76, + "learning_rate": 1.8311383436871814e-05, + "loss": 0.5798, + "step": 1123 + }, + { + "epoch": 0.76, + "learning_rate": 1.8308436348399855e-05, + "loss": 0.5713, + "step": 1124 + }, + { + "epoch": 0.76, + "learning_rate": 1.8305486928029383e-05, + "loss": 0.5625, + "step": 1125 + }, + { + "epoch": 0.76, + "learning_rate": 1.83025351765882e-05, + "loss": 0.5452, + "step": 1126 + }, + { + "epoch": 0.76, + "learning_rate": 1.8299581094904765e-05, + "loss": 0.5403, + "step": 1127 + }, + { + "epoch": 0.76, + "learning_rate": 1.829662468380819e-05, + "loss": 0.5742, + "step": 1128 + }, + { + "epoch": 0.76, + "learning_rate": 1.8293665944128237e-05, + "loss": 0.5546, + "step": 1129 + }, + { + "epoch": 0.76, + "learning_rate": 1.8290704876695325e-05, + "loss": 0.544, + "step": 1130 + }, + { + "epoch": 0.76, + "learning_rate": 1.828774148234053e-05, + "loss": 0.5635, + "step": 1131 + }, + { + "epoch": 0.76, + "learning_rate": 1.828477576189557e-05, + "loss": 0.5788, + "step": 1132 + }, + { + "epoch": 0.76, + "learning_rate": 1.828180771619283e-05, + "loss": 0.5658, + "step": 1133 + }, + { + "epoch": 0.76, + "learning_rate": 1.8278837346065336e-05, + "loss": 0.5797, + "step": 1134 + }, + { + "epoch": 0.76, + "learning_rate": 1.8275864652346772e-05, + "loss": 0.5697, + "step": 1135 + }, + { + "epoch": 0.76, + "learning_rate": 1.8272889635871475e-05, + "loss": 0.5545, + "step": 1136 + }, + { + "epoch": 0.77, + "learning_rate": 1.826991229747443e-05, + "loss": 0.5676, + "step": 1137 + }, + { + "epoch": 0.77, + "learning_rate": 1.8266932637991278e-05, + "loss": 0.5892, + "step": 1138 + }, + { + "epoch": 0.77, + "learning_rate": 1.8263950658258306e-05, + "loss": 0.5726, + "step": 1139 + }, + { + "epoch": 0.77, + "learning_rate": 1.826096635911246e-05, + "loss": 0.5479, + "step": 1140 + }, + { + "epoch": 0.77, + "learning_rate": 1.8257979741391326e-05, + "loss": 0.5627, + "step": 1141 + }, + { + "epoch": 0.77, + "learning_rate": 1.825499080593315e-05, + "loss": 0.5281, + "step": 1142 + }, + { + "epoch": 0.77, + "learning_rate": 1.825199955357683e-05, + "loss": 0.5572, + "step": 1143 + }, + { + "epoch": 0.77, + "learning_rate": 1.82490059851619e-05, + "loss": 0.5584, + "step": 1144 + }, + { + "epoch": 0.77, + "learning_rate": 1.8246010101528566e-05, + "loss": 0.5633, + "step": 1145 + }, + { + "epoch": 0.77, + "learning_rate": 1.8243011903517663e-05, + "loss": 0.5161, + "step": 1146 + }, + { + "epoch": 0.77, + "learning_rate": 1.8240011391970688e-05, + "loss": 0.5349, + "step": 1147 + }, + { + "epoch": 0.77, + "learning_rate": 1.8237008567729783e-05, + "loss": 0.5578, + "step": 1148 + }, + { + "epoch": 0.77, + "learning_rate": 1.8234003431637734e-05, + "loss": 0.5572, + "step": 1149 + }, + { + "epoch": 0.77, + "learning_rate": 1.823099598453799e-05, + "loss": 0.5674, + "step": 1150 + }, + { + "epoch": 0.78, + "learning_rate": 1.8227986227274638e-05, + "loss": 0.56, + "step": 1151 + }, + { + "epoch": 0.78, + "learning_rate": 1.822497416069241e-05, + "loss": 0.5024, + "step": 1152 + }, + { + "epoch": 0.78, + "learning_rate": 1.8221959785636703e-05, + "loss": 0.5567, + "step": 1153 + }, + { + "epoch": 0.78, + "learning_rate": 1.8218943102953545e-05, + "loss": 0.5724, + "step": 1154 + }, + { + "epoch": 0.78, + "learning_rate": 1.8215924113489613e-05, + "loss": 0.5166, + "step": 1155 + }, + { + "epoch": 0.78, + "learning_rate": 1.8212902818092236e-05, + "loss": 0.5596, + "step": 1156 + }, + { + "epoch": 0.78, + "learning_rate": 1.8209879217609394e-05, + "loss": 0.5518, + "step": 1157 + }, + { + "epoch": 0.78, + "learning_rate": 1.8206853312889713e-05, + "loss": 0.5058, + "step": 1158 + }, + { + "epoch": 0.78, + "learning_rate": 1.8203825104782457e-05, + "loss": 0.5774, + "step": 1159 + }, + { + "epoch": 0.78, + "learning_rate": 1.820079459413754e-05, + "loss": 0.5464, + "step": 1160 + }, + { + "epoch": 0.78, + "learning_rate": 1.8197761781805532e-05, + "loss": 0.5645, + "step": 1161 + }, + { + "epoch": 0.78, + "learning_rate": 1.8194726668637636e-05, + "loss": 0.5986, + "step": 1162 + }, + { + "epoch": 0.78, + "learning_rate": 1.8191689255485707e-05, + "loss": 0.5357, + "step": 1163 + }, + { + "epoch": 0.78, + "learning_rate": 1.8188649543202244e-05, + "loss": 0.5441, + "step": 1164 + }, + { + "epoch": 0.78, + "learning_rate": 1.8185607532640396e-05, + "loss": 0.6002, + "step": 1165 + }, + { + "epoch": 0.79, + "learning_rate": 1.8182563224653947e-05, + "loss": 0.5582, + "step": 1166 + }, + { + "epoch": 0.79, + "learning_rate": 1.817951662009734e-05, + "loss": 0.5546, + "step": 1167 + }, + { + "epoch": 0.79, + "learning_rate": 1.8176467719825645e-05, + "loss": 0.5573, + "step": 1168 + }, + { + "epoch": 0.79, + "learning_rate": 1.8173416524694595e-05, + "loss": 0.5548, + "step": 1169 + }, + { + "epoch": 0.79, + "learning_rate": 1.8170363035560544e-05, + "loss": 0.5505, + "step": 1170 + }, + { + "epoch": 0.79, + "learning_rate": 1.816730725328052e-05, + "loss": 0.5436, + "step": 1171 + }, + { + "epoch": 0.79, + "learning_rate": 1.816424917871217e-05, + "loss": 0.5785, + "step": 1172 + }, + { + "epoch": 0.79, + "learning_rate": 1.816118881271379e-05, + "loss": 0.5582, + "step": 1173 + }, + { + "epoch": 0.79, + "learning_rate": 1.815812615614433e-05, + "loss": 0.5828, + "step": 1174 + }, + { + "epoch": 0.79, + "learning_rate": 1.8155061209863368e-05, + "loss": 0.5446, + "step": 1175 + }, + { + "epoch": 0.79, + "learning_rate": 1.815199397473113e-05, + "loss": 0.5474, + "step": 1176 + }, + { + "epoch": 0.79, + "learning_rate": 1.814892445160849e-05, + "loss": 0.5278, + "step": 1177 + }, + { + "epoch": 0.79, + "learning_rate": 1.8145852641356963e-05, + "loss": 0.5898, + "step": 1178 + }, + { + "epoch": 0.79, + "learning_rate": 1.8142778544838695e-05, + "loss": 0.5891, + "step": 1179 + }, + { + "epoch": 0.79, + "learning_rate": 1.8139702162916485e-05, + "loss": 0.5793, + "step": 1180 + }, + { + "epoch": 0.8, + "learning_rate": 1.813662349645377e-05, + "loss": 0.5586, + "step": 1181 + }, + { + "epoch": 0.8, + "learning_rate": 1.8133542546314628e-05, + "loss": 0.5638, + "step": 1182 + }, + { + "epoch": 0.8, + "learning_rate": 1.813045931336378e-05, + "loss": 0.5568, + "step": 1183 + }, + { + "epoch": 0.8, + "learning_rate": 1.812737379846658e-05, + "loss": 0.5527, + "step": 1184 + }, + { + "epoch": 0.8, + "learning_rate": 1.8124286002489034e-05, + "loss": 0.5288, + "step": 1185 + }, + { + "epoch": 0.8, + "learning_rate": 1.812119592629778e-05, + "loss": 0.5394, + "step": 1186 + }, + { + "epoch": 0.8, + "learning_rate": 1.8118103570760097e-05, + "loss": 0.5451, + "step": 1187 + }, + { + "epoch": 0.8, + "learning_rate": 1.8115008936743908e-05, + "loss": 0.5415, + "step": 1188 + }, + { + "epoch": 0.8, + "learning_rate": 1.8111912025117774e-05, + "loss": 0.5766, + "step": 1189 + }, + { + "epoch": 0.8, + "learning_rate": 1.8108812836750887e-05, + "loss": 0.537, + "step": 1190 + }, + { + "epoch": 0.8, + "learning_rate": 1.810571137251309e-05, + "loss": 0.5629, + "step": 1191 + }, + { + "epoch": 0.8, + "learning_rate": 1.8102607633274863e-05, + "loss": 0.6042, + "step": 1192 + }, + { + "epoch": 0.8, + "learning_rate": 1.809950161990731e-05, + "loss": 0.5561, + "step": 1193 + }, + { + "epoch": 0.8, + "learning_rate": 1.8096393333282195e-05, + "loss": 0.5187, + "step": 1194 + }, + { + "epoch": 0.8, + "learning_rate": 1.8093282774271908e-05, + "loss": 0.5787, + "step": 1195 + }, + { + "epoch": 0.81, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.5906, + "step": 1196 + }, + { + "epoch": 0.81, + "learning_rate": 1.8087054842588565e-05, + "loss": 0.5603, + "step": 1197 + }, + { + "epoch": 0.81, + "learning_rate": 1.808393747166348e-05, + "loss": 0.5595, + "step": 1198 + }, + { + "epoch": 0.81, + "learning_rate": 1.808081783184916e-05, + "loss": 0.5911, + "step": 1199 + }, + { + "epoch": 0.81, + "learning_rate": 1.807769592402119e-05, + "loss": 0.5518, + "step": 1200 + }, + { + "epoch": 0.81, + "learning_rate": 1.8074571749055778e-05, + "loss": 0.5511, + "step": 1201 + }, + { + "epoch": 0.81, + "learning_rate": 1.8071445307829775e-05, + "loss": 0.5431, + "step": 1202 + }, + { + "epoch": 0.81, + "learning_rate": 1.8068316601220667e-05, + "loss": 0.5305, + "step": 1203 + }, + { + "epoch": 0.81, + "learning_rate": 1.8065185630106583e-05, + "loss": 0.5709, + "step": 1204 + }, + { + "epoch": 0.81, + "learning_rate": 1.8062052395366275e-05, + "loss": 0.5471, + "step": 1205 + }, + { + "epoch": 0.81, + "learning_rate": 1.805891689787914e-05, + "loss": 0.521, + "step": 1206 + }, + { + "epoch": 0.81, + "learning_rate": 1.8055779138525205e-05, + "loss": 0.565, + "step": 1207 + }, + { + "epoch": 0.81, + "learning_rate": 1.8052639118185132e-05, + "loss": 0.5485, + "step": 1208 + }, + { + "epoch": 0.81, + "learning_rate": 1.8049496837740223e-05, + "loss": 0.5483, + "step": 1209 + }, + { + "epoch": 0.81, + "learning_rate": 1.8046352298072408e-05, + "loss": 0.5808, + "step": 1210 + }, + { + "epoch": 0.82, + "learning_rate": 1.804320550006425e-05, + "loss": 0.5419, + "step": 1211 + }, + { + "epoch": 0.82, + "learning_rate": 1.8040056444598958e-05, + "loss": 0.5123, + "step": 1212 + }, + { + "epoch": 0.82, + "learning_rate": 1.8036905132560362e-05, + "loss": 0.5991, + "step": 1213 + }, + { + "epoch": 0.82, + "learning_rate": 1.8033751564832926e-05, + "loss": 0.5723, + "step": 1214 + }, + { + "epoch": 0.82, + "learning_rate": 1.803059574230175e-05, + "loss": 0.5062, + "step": 1215 + }, + { + "epoch": 0.82, + "learning_rate": 1.8027437665852574e-05, + "loss": 0.5779, + "step": 1216 + }, + { + "epoch": 0.82, + "learning_rate": 1.8024277336371755e-05, + "loss": 0.5962, + "step": 1217 + }, + { + "epoch": 0.82, + "learning_rate": 1.80211147547463e-05, + "loss": 0.575, + "step": 1218 + }, + { + "epoch": 0.82, + "learning_rate": 1.8017949921863832e-05, + "loss": 0.5319, + "step": 1219 + }, + { + "epoch": 0.82, + "learning_rate": 1.8014782838612616e-05, + "loss": 0.5337, + "step": 1220 + }, + { + "epoch": 0.82, + "learning_rate": 1.8011613505881545e-05, + "loss": 0.5396, + "step": 1221 + }, + { + "epoch": 0.82, + "learning_rate": 1.800844192456015e-05, + "loss": 0.5938, + "step": 1222 + }, + { + "epoch": 0.82, + "learning_rate": 1.800526809553858e-05, + "loss": 0.5384, + "step": 1223 + }, + { + "epoch": 0.82, + "learning_rate": 1.800209201970762e-05, + "loss": 0.5173, + "step": 1224 + }, + { + "epoch": 0.82, + "learning_rate": 1.7998913697958693e-05, + "loss": 0.5705, + "step": 1225 + }, + { + "epoch": 0.83, + "learning_rate": 1.7995733131183846e-05, + "loss": 0.5306, + "step": 1226 + }, + { + "epoch": 0.83, + "learning_rate": 1.799255032027576e-05, + "loss": 0.5502, + "step": 1227 + }, + { + "epoch": 0.83, + "learning_rate": 1.7989365266127735e-05, + "loss": 0.5615, + "step": 1228 + }, + { + "epoch": 0.83, + "learning_rate": 1.7986177969633717e-05, + "loss": 0.5426, + "step": 1229 + }, + { + "epoch": 0.83, + "learning_rate": 1.7982988431688266e-05, + "loss": 0.5941, + "step": 1230 + }, + { + "epoch": 0.83, + "learning_rate": 1.797979665318658e-05, + "loss": 0.5623, + "step": 1231 + }, + { + "epoch": 0.83, + "learning_rate": 1.7976602635024485e-05, + "loss": 0.5853, + "step": 1232 + }, + { + "epoch": 0.83, + "learning_rate": 1.7973406378098434e-05, + "loss": 0.4976, + "step": 1233 + }, + { + "epoch": 0.83, + "learning_rate": 1.7970207883305512e-05, + "loss": 0.5476, + "step": 1234 + }, + { + "epoch": 0.83, + "learning_rate": 1.7967007151543425e-05, + "loss": 0.5423, + "step": 1235 + }, + { + "epoch": 0.83, + "learning_rate": 1.796380418371051e-05, + "loss": 0.5634, + "step": 1236 + }, + { + "epoch": 0.83, + "learning_rate": 1.7960598980705734e-05, + "loss": 0.5151, + "step": 1237 + }, + { + "epoch": 0.83, + "learning_rate": 1.795739154342869e-05, + "loss": 0.5512, + "step": 1238 + }, + { + "epoch": 0.83, + "learning_rate": 1.7954181872779598e-05, + "loss": 0.586, + "step": 1239 + }, + { + "epoch": 0.83, + "learning_rate": 1.7950969969659303e-05, + "loss": 0.5718, + "step": 1240 + }, + { + "epoch": 0.84, + "learning_rate": 1.794775583496928e-05, + "loss": 0.5715, + "step": 1241 + }, + { + "epoch": 0.84, + "learning_rate": 1.7944539469611625e-05, + "loss": 0.5841, + "step": 1242 + }, + { + "epoch": 0.84, + "learning_rate": 1.7941320874489065e-05, + "loss": 0.5748, + "step": 1243 + }, + { + "epoch": 0.84, + "learning_rate": 1.7938100050504953e-05, + "loss": 0.5816, + "step": 1244 + }, + { + "epoch": 0.84, + "learning_rate": 1.7934876998563263e-05, + "loss": 0.5453, + "step": 1245 + }, + { + "epoch": 0.84, + "learning_rate": 1.7931651719568603e-05, + "loss": 0.5642, + "step": 1246 + }, + { + "epoch": 0.84, + "learning_rate": 1.7928424214426196e-05, + "loss": 0.5858, + "step": 1247 + }, + { + "epoch": 0.84, + "learning_rate": 1.7925194484041893e-05, + "loss": 0.6241, + "step": 1248 + }, + { + "epoch": 0.84, + "learning_rate": 1.7921962529322173e-05, + "loss": 0.5986, + "step": 1249 + }, + { + "epoch": 0.84, + "learning_rate": 1.7918728351174136e-05, + "loss": 0.587, + "step": 1250 + }, + { + "epoch": 0.84, + "learning_rate": 1.791549195050551e-05, + "loss": 0.5381, + "step": 1251 + }, + { + "epoch": 0.84, + "learning_rate": 1.791225332822464e-05, + "loss": 0.5746, + "step": 1252 + }, + { + "epoch": 0.84, + "learning_rate": 1.7909012485240497e-05, + "loss": 0.5194, + "step": 1253 + }, + { + "epoch": 0.84, + "learning_rate": 1.790576942246268e-05, + "loss": 0.5305, + "step": 1254 + }, + { + "epoch": 0.85, + "learning_rate": 1.790252414080141e-05, + "loss": 0.5204, + "step": 1255 + }, + { + "epoch": 0.85, + "learning_rate": 1.7899276641167516e-05, + "loss": 0.5574, + "step": 1256 + }, + { + "epoch": 0.85, + "learning_rate": 1.7896026924472478e-05, + "loss": 0.5867, + "step": 1257 + }, + { + "epoch": 0.85, + "learning_rate": 1.789277499162837e-05, + "loss": 0.5388, + "step": 1258 + }, + { + "epoch": 0.85, + "learning_rate": 1.7889520843547908e-05, + "loss": 0.5574, + "step": 1259 + }, + { + "epoch": 0.85, + "learning_rate": 1.788626448114442e-05, + "loss": 0.5374, + "step": 1260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7883005905331855e-05, + "loss": 0.5887, + "step": 1261 + }, + { + "epoch": 0.85, + "learning_rate": 1.787974511702479e-05, + "loss": 0.5416, + "step": 1262 + }, + { + "epoch": 0.85, + "learning_rate": 1.787648211713841e-05, + "loss": 0.5464, + "step": 1263 + }, + { + "epoch": 0.85, + "learning_rate": 1.7873216906588536e-05, + "loss": 0.5829, + "step": 1264 + }, + { + "epoch": 0.85, + "learning_rate": 1.7869949486291604e-05, + "loss": 0.54, + "step": 1265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7866679857164663e-05, + "loss": 0.5434, + "step": 1266 + }, + { + "epoch": 0.85, + "learning_rate": 1.786340802012539e-05, + "loss": 0.5229, + "step": 1267 + }, + { + "epoch": 0.85, + "learning_rate": 1.7860133976092083e-05, + "loss": 0.5692, + "step": 1268 + }, + { + "epoch": 0.85, + "learning_rate": 1.7856857725983653e-05, + "loss": 0.5863, + "step": 1269 + }, + { + "epoch": 0.86, + "learning_rate": 1.7853579270719635e-05, + "loss": 0.5444, + "step": 1270 + }, + { + "epoch": 0.86, + "learning_rate": 1.785029861122018e-05, + "loss": 0.5632, + "step": 1271 + }, + { + "epoch": 0.86, + "learning_rate": 1.7847015748406055e-05, + "loss": 0.5744, + "step": 1272 + }, + { + "epoch": 0.86, + "learning_rate": 1.7843730683198658e-05, + "loss": 0.5433, + "step": 1273 + }, + { + "epoch": 0.86, + "learning_rate": 1.7840443416519985e-05, + "loss": 0.5505, + "step": 1274 + }, + { + "epoch": 0.86, + "learning_rate": 1.7837153949292674e-05, + "loss": 0.5459, + "step": 1275 + }, + { + "epoch": 0.86, + "learning_rate": 1.7833862282439956e-05, + "loss": 0.5358, + "step": 1276 + }, + { + "epoch": 0.86, + "learning_rate": 1.7830568416885697e-05, + "loss": 0.5456, + "step": 1277 + }, + { + "epoch": 0.86, + "learning_rate": 1.7827272353554376e-05, + "loss": 0.556, + "step": 1278 + }, + { + "epoch": 0.86, + "learning_rate": 1.782397409337108e-05, + "loss": 0.5292, + "step": 1279 + }, + { + "epoch": 0.86, + "learning_rate": 1.782067363726153e-05, + "loss": 0.5772, + "step": 1280 + }, + { + "epoch": 0.86, + "learning_rate": 1.7817370986152042e-05, + "loss": 0.551, + "step": 1281 + }, + { + "epoch": 0.86, + "learning_rate": 1.7814066140969565e-05, + "loss": 0.5204, + "step": 1282 + }, + { + "epoch": 0.86, + "learning_rate": 1.7810759102641657e-05, + "loss": 0.5896, + "step": 1283 + }, + { + "epoch": 0.86, + "learning_rate": 1.7807449872096492e-05, + "loss": 0.5536, + "step": 1284 + }, + { + "epoch": 0.87, + "learning_rate": 1.7804138450262862e-05, + "loss": 0.577, + "step": 1285 + }, + { + "epoch": 0.87, + "learning_rate": 1.7800824838070166e-05, + "loss": 0.5369, + "step": 1286 + }, + { + "epoch": 0.87, + "learning_rate": 1.779750903644843e-05, + "loss": 0.5514, + "step": 1287 + }, + { + "epoch": 0.87, + "learning_rate": 1.7794191046328283e-05, + "loss": 0.5477, + "step": 1288 + }, + { + "epoch": 0.87, + "learning_rate": 1.779087086864098e-05, + "loss": 0.5724, + "step": 1289 + }, + { + "epoch": 0.87, + "learning_rate": 1.7787548504318372e-05, + "loss": 0.544, + "step": 1290 + }, + { + "epoch": 0.87, + "learning_rate": 1.7784223954292944e-05, + "loss": 0.5232, + "step": 1291 + }, + { + "epoch": 0.87, + "learning_rate": 1.7780897219497783e-05, + "loss": 0.5792, + "step": 1292 + }, + { + "epoch": 0.87, + "learning_rate": 1.7777568300866587e-05, + "loss": 0.5664, + "step": 1293 + }, + { + "epoch": 0.87, + "learning_rate": 1.777423719933368e-05, + "loss": 0.5499, + "step": 1294 + }, + { + "epoch": 0.87, + "learning_rate": 1.7770903915833986e-05, + "loss": 0.5658, + "step": 1295 + }, + { + "epoch": 0.87, + "learning_rate": 1.7767568451303042e-05, + "loss": 0.5446, + "step": 1296 + }, + { + "epoch": 0.87, + "learning_rate": 1.7764230806677005e-05, + "loss": 0.5919, + "step": 1297 + }, + { + "epoch": 0.87, + "learning_rate": 1.7760890982892638e-05, + "loss": 0.5649, + "step": 1298 + }, + { + "epoch": 0.87, + "learning_rate": 1.775754898088732e-05, + "loss": 0.5406, + "step": 1299 + }, + { + "epoch": 0.88, + "learning_rate": 1.775420480159903e-05, + "loss": 0.5525, + "step": 1300 + }, + { + "epoch": 0.88, + "learning_rate": 1.775085844596638e-05, + "loss": 0.5279, + "step": 1301 + }, + { + "epoch": 0.88, + "learning_rate": 1.7747509914928568e-05, + "loss": 0.5058, + "step": 1302 + }, + { + "epoch": 0.88, + "learning_rate": 1.7744159209425416e-05, + "loss": 0.5327, + "step": 1303 + }, + { + "epoch": 0.88, + "learning_rate": 1.774080633039736e-05, + "loss": 0.5858, + "step": 1304 + }, + { + "epoch": 0.88, + "learning_rate": 1.7737451278785435e-05, + "loss": 0.5439, + "step": 1305 + }, + { + "epoch": 0.88, + "learning_rate": 1.773409405553129e-05, + "loss": 0.5526, + "step": 1306 + }, + { + "epoch": 0.88, + "learning_rate": 1.773073466157719e-05, + "loss": 0.5348, + "step": 1307 + }, + { + "epoch": 0.88, + "learning_rate": 1.7727373097866002e-05, + "loss": 0.498, + "step": 1308 + }, + { + "epoch": 0.88, + "learning_rate": 1.7724009365341204e-05, + "loss": 0.5656, + "step": 1309 + }, + { + "epoch": 0.88, + "learning_rate": 1.772064346494688e-05, + "loss": 0.5645, + "step": 1310 + }, + { + "epoch": 0.88, + "learning_rate": 1.7717275397627724e-05, + "loss": 0.5431, + "step": 1311 + }, + { + "epoch": 0.88, + "learning_rate": 1.7713905164329044e-05, + "loss": 0.5695, + "step": 1312 + }, + { + "epoch": 0.88, + "learning_rate": 1.771053276599675e-05, + "loss": 0.5572, + "step": 1313 + }, + { + "epoch": 0.88, + "learning_rate": 1.7707158203577356e-05, + "loss": 0.5517, + "step": 1314 + }, + { + "epoch": 0.89, + "learning_rate": 1.7703781478017995e-05, + "loss": 0.5426, + "step": 1315 + }, + { + "epoch": 0.89, + "learning_rate": 1.770040259026639e-05, + "loss": 0.5556, + "step": 1316 + }, + { + "epoch": 0.89, + "learning_rate": 1.7697021541270897e-05, + "loss": 0.5778, + "step": 1317 + }, + { + "epoch": 0.89, + "learning_rate": 1.769363833198045e-05, + "loss": 0.5986, + "step": 1318 + }, + { + "epoch": 0.89, + "learning_rate": 1.7690252963344606e-05, + "loss": 0.5727, + "step": 1319 + }, + { + "epoch": 0.89, + "learning_rate": 1.768686543631352e-05, + "loss": 0.536, + "step": 1320 + }, + { + "epoch": 0.89, + "learning_rate": 1.7683475751837963e-05, + "loss": 0.5263, + "step": 1321 + }, + { + "epoch": 0.89, + "learning_rate": 1.7680083910869305e-05, + "loss": 0.5217, + "step": 1322 + }, + { + "epoch": 0.89, + "learning_rate": 1.7676689914359517e-05, + "loss": 0.5059, + "step": 1323 + }, + { + "epoch": 0.89, + "learning_rate": 1.7673293763261182e-05, + "loss": 0.5205, + "step": 1324 + }, + { + "epoch": 0.89, + "learning_rate": 1.7669895458527487e-05, + "loss": 0.5552, + "step": 1325 + }, + { + "epoch": 0.89, + "learning_rate": 1.766649500111222e-05, + "loss": 0.5728, + "step": 1326 + }, + { + "epoch": 0.89, + "learning_rate": 1.7663092391969773e-05, + "loss": 0.5808, + "step": 1327 + }, + { + "epoch": 0.89, + "learning_rate": 1.765968763205515e-05, + "loss": 0.5567, + "step": 1328 + }, + { + "epoch": 0.89, + "learning_rate": 1.7656280722323945e-05, + "loss": 0.5205, + "step": 1329 + }, + { + "epoch": 0.9, + "learning_rate": 1.765287166373237e-05, + "loss": 0.5659, + "step": 1330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7649460457237223e-05, + "loss": 0.5307, + "step": 1331 + }, + { + "epoch": 0.9, + "learning_rate": 1.7646047103795928e-05, + "loss": 0.5007, + "step": 1332 + }, + { + "epoch": 0.9, + "learning_rate": 1.7642631604366493e-05, + "loss": 0.5241, + "step": 1333 + }, + { + "epoch": 0.9, + "learning_rate": 1.7639213959907525e-05, + "loss": 0.5569, + "step": 1334 + }, + { + "epoch": 0.9, + "learning_rate": 1.7635794171378257e-05, + "loss": 0.519, + "step": 1335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7632372239738497e-05, + "loss": 0.5643, + "step": 1336 + }, + { + "epoch": 0.9, + "learning_rate": 1.762894816594867e-05, + "loss": 0.5979, + "step": 1337 + }, + { + "epoch": 0.9, + "learning_rate": 1.7625521950969802e-05, + "loss": 0.5538, + "step": 1338 + }, + { + "epoch": 0.9, + "learning_rate": 1.762209359576351e-05, + "loss": 0.5651, + "step": 1339 + }, + { + "epoch": 0.9, + "learning_rate": 1.761866310129202e-05, + "loss": 0.5428, + "step": 1340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7615230468518157e-05, + "loss": 0.5413, + "step": 1341 + }, + { + "epoch": 0.9, + "learning_rate": 1.761179569840535e-05, + "loss": 0.5716, + "step": 1342 + }, + { + "epoch": 0.9, + "learning_rate": 1.7608358791917615e-05, + "loss": 0.549, + "step": 1343 + }, + { + "epoch": 0.9, + "learning_rate": 1.760491975001958e-05, + "loss": 0.5682, + "step": 1344 + }, + { + "epoch": 0.91, + "learning_rate": 1.760147857367647e-05, + "loss": 0.5648, + "step": 1345 + }, + { + "epoch": 0.91, + "learning_rate": 1.7598035263854103e-05, + "loss": 0.5652, + "step": 1346 + }, + { + "epoch": 0.91, + "learning_rate": 1.7594589821518905e-05, + "loss": 0.5656, + "step": 1347 + }, + { + "epoch": 0.91, + "learning_rate": 1.7591142247637895e-05, + "loss": 0.5546, + "step": 1348 + }, + { + "epoch": 0.91, + "learning_rate": 1.7587692543178684e-05, + "loss": 0.5348, + "step": 1349 + }, + { + "epoch": 0.91, + "learning_rate": 1.7584240709109498e-05, + "loss": 0.549, + "step": 1350 + }, + { + "epoch": 0.91, + "learning_rate": 1.7580786746399146e-05, + "loss": 0.5672, + "step": 1351 + }, + { + "epoch": 0.91, + "learning_rate": 1.7577330656017037e-05, + "loss": 0.5469, + "step": 1352 + }, + { + "epoch": 0.91, + "learning_rate": 1.7573872438933183e-05, + "loss": 0.5214, + "step": 1353 + }, + { + "epoch": 0.91, + "learning_rate": 1.757041209611819e-05, + "loss": 0.5863, + "step": 1354 + }, + { + "epoch": 0.91, + "learning_rate": 1.7566949628543252e-05, + "loss": 0.5102, + "step": 1355 + }, + { + "epoch": 0.91, + "learning_rate": 1.7563485037180177e-05, + "loss": 0.5402, + "step": 1356 + }, + { + "epoch": 0.91, + "learning_rate": 1.7560018323001354e-05, + "loss": 0.5629, + "step": 1357 + }, + { + "epoch": 0.91, + "learning_rate": 1.7556549486979775e-05, + "loss": 0.5137, + "step": 1358 + }, + { + "epoch": 0.92, + "learning_rate": 1.7553078530089024e-05, + "loss": 0.5946, + "step": 1359 + }, + { + "epoch": 0.92, + "learning_rate": 1.754960545330328e-05, + "loss": 0.5536, + "step": 1360 + }, + { + "epoch": 0.92, + "learning_rate": 1.7546130257597322e-05, + "loss": 0.5257, + "step": 1361 + }, + { + "epoch": 0.92, + "learning_rate": 1.7542652943946523e-05, + "loss": 0.5616, + "step": 1362 + }, + { + "epoch": 0.92, + "learning_rate": 1.753917351332684e-05, + "loss": 0.5994, + "step": 1363 + }, + { + "epoch": 0.92, + "learning_rate": 1.753569196671484e-05, + "loss": 0.588, + "step": 1364 + }, + { + "epoch": 0.92, + "learning_rate": 1.753220830508767e-05, + "loss": 0.5721, + "step": 1365 + }, + { + "epoch": 0.92, + "learning_rate": 1.752872252942308e-05, + "loss": 0.5262, + "step": 1366 + }, + { + "epoch": 0.92, + "learning_rate": 1.752523464069941e-05, + "loss": 0.5495, + "step": 1367 + }, + { + "epoch": 0.92, + "learning_rate": 1.7521744639895593e-05, + "loss": 0.5531, + "step": 1368 + }, + { + "epoch": 0.92, + "learning_rate": 1.7518252527991154e-05, + "loss": 0.58, + "step": 1369 + }, + { + "epoch": 0.92, + "learning_rate": 1.7514758305966206e-05, + "loss": 0.5573, + "step": 1370 + }, + { + "epoch": 0.92, + "learning_rate": 1.751126197480147e-05, + "loss": 0.5405, + "step": 1371 + }, + { + "epoch": 0.92, + "learning_rate": 1.750776353547824e-05, + "loss": 0.5244, + "step": 1372 + }, + { + "epoch": 0.92, + "learning_rate": 1.7504262988978417e-05, + "loss": 0.5468, + "step": 1373 + }, + { + "epoch": 0.93, + "learning_rate": 1.750076033628448e-05, + "loss": 0.517, + "step": 1374 + }, + { + "epoch": 0.93, + "learning_rate": 1.7497255578379514e-05, + "loss": 0.559, + "step": 1375 + }, + { + "epoch": 0.93, + "learning_rate": 1.7493748716247174e-05, + "loss": 0.5307, + "step": 1376 + }, + { + "epoch": 0.93, + "learning_rate": 1.749023975087173e-05, + "loss": 0.5896, + "step": 1377 + }, + { + "epoch": 0.93, + "learning_rate": 1.7486728683238025e-05, + "loss": 0.5933, + "step": 1378 + }, + { + "epoch": 0.93, + "learning_rate": 1.74832155143315e-05, + "loss": 0.4987, + "step": 1379 + }, + { + "epoch": 0.93, + "learning_rate": 1.7479700245138184e-05, + "loss": 0.5837, + "step": 1380 + }, + { + "epoch": 0.93, + "learning_rate": 1.747618287664469e-05, + "loss": 0.5429, + "step": 1381 + }, + { + "epoch": 0.93, + "learning_rate": 1.7472663409838225e-05, + "loss": 0.5695, + "step": 1382 + }, + { + "epoch": 0.93, + "learning_rate": 1.746914184570659e-05, + "loss": 0.5263, + "step": 1383 + }, + { + "epoch": 0.93, + "learning_rate": 1.7465618185238167e-05, + "loss": 0.561, + "step": 1384 + }, + { + "epoch": 0.93, + "learning_rate": 1.746209242942193e-05, + "loss": 0.5063, + "step": 1385 + }, + { + "epoch": 0.93, + "learning_rate": 1.745856457924744e-05, + "loss": 0.5745, + "step": 1386 + }, + { + "epoch": 0.93, + "learning_rate": 1.7455034635704845e-05, + "loss": 0.5506, + "step": 1387 + }, + { + "epoch": 0.93, + "learning_rate": 1.745150259978488e-05, + "loss": 0.6118, + "step": 1388 + }, + { + "epoch": 0.94, + "learning_rate": 1.744796847247887e-05, + "loss": 0.5525, + "step": 1389 + }, + { + "epoch": 0.94, + "learning_rate": 1.7444432254778725e-05, + "loss": 0.5814, + "step": 1390 + }, + { + "epoch": 0.94, + "learning_rate": 1.7440893947676943e-05, + "loss": 0.5578, + "step": 1391 + }, + { + "epoch": 0.94, + "learning_rate": 1.743735355216661e-05, + "loss": 0.5782, + "step": 1392 + }, + { + "epoch": 0.94, + "learning_rate": 1.7433811069241392e-05, + "loss": 0.5566, + "step": 1393 + }, + { + "epoch": 0.94, + "learning_rate": 1.7430266499895547e-05, + "loss": 0.5485, + "step": 1394 + }, + { + "epoch": 0.94, + "learning_rate": 1.7426719845123914e-05, + "loss": 0.5494, + "step": 1395 + }, + { + "epoch": 0.94, + "learning_rate": 1.7423171105921924e-05, + "loss": 0.5581, + "step": 1396 + }, + { + "epoch": 0.94, + "learning_rate": 1.7419620283285585e-05, + "loss": 0.5435, + "step": 1397 + }, + { + "epoch": 0.94, + "learning_rate": 1.741606737821149e-05, + "loss": 0.5617, + "step": 1398 + }, + { + "epoch": 0.94, + "learning_rate": 1.741251239169683e-05, + "loss": 0.5174, + "step": 1399 + }, + { + "epoch": 0.94, + "learning_rate": 1.7408955324739363e-05, + "loss": 0.5197, + "step": 1400 + }, + { + "epoch": 0.94, + "learning_rate": 1.740539617833744e-05, + "loss": 0.5342, + "step": 1401 + }, + { + "epoch": 0.94, + "learning_rate": 1.7401834953489993e-05, + "loss": 0.5741, + "step": 1402 + }, + { + "epoch": 0.94, + "learning_rate": 1.7398271651196538e-05, + "loss": 0.5477, + "step": 1403 + }, + { + "epoch": 0.95, + "learning_rate": 1.739470627245717e-05, + "loss": 0.5801, + "step": 1404 + }, + { + "epoch": 0.95, + "learning_rate": 1.7391138818272578e-05, + "loss": 0.5585, + "step": 1405 + }, + { + "epoch": 0.95, + "learning_rate": 1.7387569289644025e-05, + "loss": 0.5288, + "step": 1406 + }, + { + "epoch": 0.95, + "learning_rate": 1.7383997687573354e-05, + "loss": 0.5547, + "step": 1407 + }, + { + "epoch": 0.95, + "learning_rate": 1.7380424013062996e-05, + "loss": 0.5832, + "step": 1408 + }, + { + "epoch": 0.95, + "learning_rate": 1.737684826711596e-05, + "loss": 0.5243, + "step": 1409 + }, + { + "epoch": 0.95, + "learning_rate": 1.737327045073584e-05, + "loss": 0.545, + "step": 1410 + }, + { + "epoch": 0.95, + "learning_rate": 1.7369690564926808e-05, + "loss": 0.5605, + "step": 1411 + }, + { + "epoch": 0.95, + "learning_rate": 1.736610861069361e-05, + "loss": 0.567, + "step": 1412 + }, + { + "epoch": 0.95, + "learning_rate": 1.7362524589041593e-05, + "loss": 0.5514, + "step": 1413 + }, + { + "epoch": 0.95, + "learning_rate": 1.7358938500976662e-05, + "loss": 0.5287, + "step": 1414 + }, + { + "epoch": 0.95, + "learning_rate": 1.7355350347505312e-05, + "loss": 0.5304, + "step": 1415 + }, + { + "epoch": 0.95, + "learning_rate": 1.735176012963462e-05, + "loss": 0.4766, + "step": 1416 + }, + { + "epoch": 0.95, + "learning_rate": 1.734816784837224e-05, + "loss": 0.5207, + "step": 1417 + }, + { + "epoch": 0.95, + "learning_rate": 1.7344573504726397e-05, + "loss": 0.5648, + "step": 1418 + }, + { + "epoch": 0.96, + "learning_rate": 1.7340977099705914e-05, + "loss": 0.5315, + "step": 1419 + }, + { + "epoch": 0.96, + "learning_rate": 1.7337378634320173e-05, + "loss": 0.5466, + "step": 1420 + }, + { + "epoch": 0.96, + "learning_rate": 1.7333778109579144e-05, + "loss": 0.5923, + "step": 1421 + }, + { + "epoch": 0.96, + "learning_rate": 1.7330175526493373e-05, + "loss": 0.5792, + "step": 1422 + }, + { + "epoch": 0.96, + "learning_rate": 1.7326570886073986e-05, + "loss": 0.5649, + "step": 1423 + }, + { + "epoch": 0.96, + "learning_rate": 1.7322964189332682e-05, + "loss": 0.5476, + "step": 1424 + }, + { + "epoch": 0.96, + "learning_rate": 1.7319355437281737e-05, + "loss": 0.5452, + "step": 1425 + }, + { + "epoch": 0.96, + "learning_rate": 1.7315744630934007e-05, + "loss": 0.5462, + "step": 1426 + }, + { + "epoch": 0.96, + "learning_rate": 1.731213177130293e-05, + "loss": 0.5276, + "step": 1427 + }, + { + "epoch": 0.96, + "learning_rate": 1.7308516859402507e-05, + "loss": 0.5519, + "step": 1428 + }, + { + "epoch": 0.96, + "learning_rate": 1.7304899896247328e-05, + "loss": 0.5934, + "step": 1429 + }, + { + "epoch": 0.96, + "learning_rate": 1.730128088285255e-05, + "loss": 0.5544, + "step": 1430 + }, + { + "epoch": 0.96, + "learning_rate": 1.7297659820233906e-05, + "loss": 0.5212, + "step": 1431 + }, + { + "epoch": 0.96, + "learning_rate": 1.7294036709407707e-05, + "loss": 0.533, + "step": 1432 + }, + { + "epoch": 0.96, + "learning_rate": 1.7290411551390845e-05, + "loss": 0.5498, + "step": 1433 + }, + { + "epoch": 0.97, + "learning_rate": 1.7286784347200768e-05, + "loss": 0.5629, + "step": 1434 + }, + { + "epoch": 0.97, + "learning_rate": 1.7283155097855525e-05, + "loss": 0.5244, + "step": 1435 + }, + { + "epoch": 0.97, + "learning_rate": 1.727952380437371e-05, + "loss": 0.5323, + "step": 1436 + }, + { + "epoch": 0.97, + "learning_rate": 1.7275890467774515e-05, + "loss": 0.5459, + "step": 1437 + }, + { + "epoch": 0.97, + "learning_rate": 1.727225508907769e-05, + "loss": 0.5407, + "step": 1438 + }, + { + "epoch": 0.97, + "learning_rate": 1.7268617669303564e-05, + "loss": 0.535, + "step": 1439 + }, + { + "epoch": 0.97, + "learning_rate": 1.7264978209473035e-05, + "loss": 0.5439, + "step": 1440 + }, + { + "epoch": 0.97, + "learning_rate": 1.7261336710607587e-05, + "loss": 0.5545, + "step": 1441 + }, + { + "epoch": 0.97, + "learning_rate": 1.7257693173729256e-05, + "loss": 0.5662, + "step": 1442 + }, + { + "epoch": 0.97, + "learning_rate": 1.725404759986067e-05, + "loss": 0.5379, + "step": 1443 + }, + { + "epoch": 0.97, + "learning_rate": 1.7250399990025005e-05, + "loss": 0.5472, + "step": 1444 + }, + { + "epoch": 0.97, + "learning_rate": 1.724675034524604e-05, + "loss": 0.5184, + "step": 1445 + }, + { + "epoch": 0.97, + "learning_rate": 1.7243098666548094e-05, + "loss": 0.517, + "step": 1446 + }, + { + "epoch": 0.97, + "learning_rate": 1.723944495495607e-05, + "loss": 0.5782, + "step": 1447 + }, + { + "epoch": 0.98, + "learning_rate": 1.7235789211495455e-05, + "loss": 0.5171, + "step": 1448 + }, + { + "epoch": 0.98, + "learning_rate": 1.723213143719228e-05, + "loss": 0.5429, + "step": 1449 + }, + { + "epoch": 0.98, + "learning_rate": 1.7228471633073164e-05, + "loss": 0.4996, + "step": 1450 + }, + { + "epoch": 0.98, + "learning_rate": 1.722480980016529e-05, + "loss": 0.523, + "step": 1451 + }, + { + "epoch": 0.98, + "learning_rate": 1.7221145939496416e-05, + "loss": 0.604, + "step": 1452 + }, + { + "epoch": 0.98, + "learning_rate": 1.7217480052094855e-05, + "loss": 0.5223, + "step": 1453 + }, + { + "epoch": 0.98, + "learning_rate": 1.7213812138989505e-05, + "loss": 0.5559, + "step": 1454 + }, + { + "epoch": 0.98, + "learning_rate": 1.7210142201209825e-05, + "loss": 0.5616, + "step": 1455 + }, + { + "epoch": 0.98, + "learning_rate": 1.7206470239785842e-05, + "loss": 0.5441, + "step": 1456 + }, + { + "epoch": 0.98, + "learning_rate": 1.7202796255748154e-05, + "loss": 0.5509, + "step": 1457 + }, + { + "epoch": 0.98, + "learning_rate": 1.7199120250127922e-05, + "loss": 0.5611, + "step": 1458 + }, + { + "epoch": 0.98, + "learning_rate": 1.7195442223956876e-05, + "loss": 0.5476, + "step": 1459 + }, + { + "epoch": 0.98, + "learning_rate": 1.719176217826732e-05, + "loss": 0.565, + "step": 1460 + }, + { + "epoch": 0.98, + "learning_rate": 1.718808011409211e-05, + "loss": 0.5307, + "step": 1461 + }, + { + "epoch": 0.98, + "learning_rate": 1.718439603246469e-05, + "loss": 0.5496, + "step": 1462 + }, + { + "epoch": 0.99, + "learning_rate": 1.7180709934419047e-05, + "loss": 0.559, + "step": 1463 + }, + { + "epoch": 0.99, + "learning_rate": 1.7177021820989746e-05, + "loss": 0.5849, + "step": 1464 + }, + { + "epoch": 0.99, + "learning_rate": 1.7173331693211922e-05, + "loss": 0.5817, + "step": 1465 + }, + { + "epoch": 0.99, + "learning_rate": 1.7169639552121268e-05, + "loss": 0.5533, + "step": 1466 + }, + { + "epoch": 0.99, + "learning_rate": 1.716594539875404e-05, + "loss": 0.5608, + "step": 1467 + }, + { + "epoch": 0.99, + "learning_rate": 1.7162249234147063e-05, + "loss": 0.5828, + "step": 1468 + }, + { + "epoch": 0.99, + "learning_rate": 1.715855105933773e-05, + "loss": 0.546, + "step": 1469 + }, + { + "epoch": 0.99, + "learning_rate": 1.7154850875363987e-05, + "loss": 0.5544, + "step": 1470 + }, + { + "epoch": 0.99, + "learning_rate": 1.7151148683264362e-05, + "loss": 0.598, + "step": 1471 + }, + { + "epoch": 0.99, + "learning_rate": 1.7147444484077928e-05, + "loss": 0.5401, + "step": 1472 + }, + { + "epoch": 0.99, + "learning_rate": 1.714373827884433e-05, + "loss": 0.5325, + "step": 1473 + }, + { + "epoch": 0.99, + "learning_rate": 1.7140030068603777e-05, + "loss": 0.5642, + "step": 1474 + }, + { + "epoch": 0.99, + "learning_rate": 1.7136319854397037e-05, + "loss": 0.5241, + "step": 1475 + }, + { + "epoch": 0.99, + "learning_rate": 1.7132607637265443e-05, + "loss": 0.5673, + "step": 1476 + }, + { + "epoch": 0.99, + "learning_rate": 1.7128893418250892e-05, + "loss": 0.5445, + "step": 1477 + }, + { + "epoch": 1.0, + "learning_rate": 1.7125177198395835e-05, + "loss": 0.5406, + "step": 1478 + }, + { + "epoch": 1.0, + "learning_rate": 1.712145897874329e-05, + "loss": 0.5535, + "step": 1479 + }, + { + "epoch": 1.0, + "learning_rate": 1.7117738760336846e-05, + "loss": 0.5634, + "step": 1480 + }, + { + "epoch": 1.0, + "learning_rate": 1.7114016544220633e-05, + "loss": 0.5694, + "step": 1481 + }, + { + "epoch": 1.0, + "learning_rate": 1.7110292331439353e-05, + "loss": 0.5582, + "step": 1482 + }, + { + "epoch": 1.0, + "learning_rate": 1.710656612303827e-05, + "loss": 0.5437, + "step": 1483 + }, + { + "epoch": 1.0, + "learning_rate": 1.7102837920063206e-05, + "loss": 0.5281, + "step": 1484 + }, + { + "epoch": 1.0, + "learning_rate": 1.7099107723560537e-05, + "loss": 0.5499, + "step": 1485 + }, + { + "epoch": 1.0, + "learning_rate": 1.7095375534577208e-05, + "loss": 0.5285, + "step": 1486 + }, + { + "epoch": 1.0, + "learning_rate": 1.709164135416072e-05, + "loss": 0.5201, + "step": 1487 + }, + { + "epoch": 1.0, + "learning_rate": 1.708790518335913e-05, + "loss": 0.5495, + "step": 1488 + }, + { + "epoch": 1.0, + "learning_rate": 1.7084167023221054e-05, + "loss": 0.5412, + "step": 1489 + }, + { + "epoch": 1.0, + "learning_rate": 1.7080426874795666e-05, + "loss": 0.5768, + "step": 1490 + }, + { + "epoch": 1.0, + "learning_rate": 1.7076684739132705e-05, + "loss": 0.507, + "step": 1491 + }, + { + "epoch": 1.0, + "learning_rate": 1.7072940617282463e-05, + "loss": 0.565, + "step": 1492 + }, + { + "epoch": 1.01, + "learning_rate": 1.7069194510295785e-05, + "loss": 0.5262, + "step": 1493 + }, + { + "epoch": 1.01, + "learning_rate": 1.7065446419224073e-05, + "loss": 0.5473, + "step": 1494 + }, + { + "epoch": 1.01, + "learning_rate": 1.7061696345119304e-05, + "loss": 0.5041, + "step": 1495 + }, + { + "epoch": 1.01, + "learning_rate": 1.7057944289033983e-05, + "loss": 0.5967, + "step": 1496 + }, + { + "epoch": 1.01, + "learning_rate": 1.7054190252021195e-05, + "loss": 0.5512, + "step": 1497 + }, + { + "epoch": 1.01, + "learning_rate": 1.705043423513457e-05, + "loss": 0.5361, + "step": 1498 + }, + { + "epoch": 1.01, + "learning_rate": 1.7046676239428287e-05, + "loss": 0.5605, + "step": 1499 + }, + { + "epoch": 1.01, + "learning_rate": 1.7042916265957107e-05, + "loss": 0.5367, + "step": 1500 + }, + { + "epoch": 1.01, + "learning_rate": 1.703915431577631e-05, + "loss": 0.5324, + "step": 1501 + }, + { + "epoch": 1.01, + "learning_rate": 1.7035390389941757e-05, + "loss": 0.5222, + "step": 1502 + }, + { + "epoch": 1.01, + "learning_rate": 1.7031624489509858e-05, + "loss": 0.5858, + "step": 1503 + }, + { + "epoch": 1.01, + "learning_rate": 1.702785661553757e-05, + "loss": 0.5545, + "step": 1504 + }, + { + "epoch": 1.01, + "learning_rate": 1.702408676908241e-05, + "loss": 0.5632, + "step": 1505 + }, + { + "epoch": 1.01, + "learning_rate": 1.7020314951202443e-05, + "loss": 0.5266, + "step": 1506 + }, + { + "epoch": 1.01, + "learning_rate": 1.7016541162956296e-05, + "loss": 0.5519, + "step": 1507 + }, + { + "epoch": 1.02, + "learning_rate": 1.701276540540315e-05, + "loss": 0.5277, + "step": 1508 + }, + { + "epoch": 1.02, + "learning_rate": 1.700898767960272e-05, + "loss": 0.5308, + "step": 1509 + }, + { + "epoch": 1.02, + "learning_rate": 1.7005207986615293e-05, + "loss": 0.5625, + "step": 1510 + }, + { + "epoch": 1.02, + "learning_rate": 1.7001426327501702e-05, + "loss": 0.5516, + "step": 1511 + }, + { + "epoch": 1.0, + "learning_rate": 1.699764270332333e-05, + "loss": 0.5446, + "step": 1512 + }, + { + "epoch": 1.0, + "learning_rate": 1.6993857115142117e-05, + "loss": 0.4884, + "step": 1513 + }, + { + "epoch": 1.0, + "learning_rate": 1.6990069564020548e-05, + "loss": 0.494, + "step": 1514 + }, + { + "epoch": 1.0, + "learning_rate": 1.698628005102166e-05, + "loss": 0.502, + "step": 1515 + }, + { + "epoch": 1.0, + "learning_rate": 1.698248857720904e-05, + "loss": 0.4559, + "step": 1516 + }, + { + "epoch": 1.0, + "learning_rate": 1.6978695143646827e-05, + "loss": 0.5284, + "step": 1517 + }, + { + "epoch": 1.0, + "learning_rate": 1.6974899751399722e-05, + "loss": 0.5137, + "step": 1518 + }, + { + "epoch": 1.0, + "learning_rate": 1.6971102401532946e-05, + "loss": 0.4768, + "step": 1519 + }, + { + "epoch": 1.01, + "learning_rate": 1.6967303095112297e-05, + "loss": 0.5221, + "step": 1520 + }, + { + "epoch": 1.01, + "learning_rate": 1.6963501833204112e-05, + "loss": 0.4772, + "step": 1521 + }, + { + "epoch": 1.01, + "learning_rate": 1.695969861687528e-05, + "loss": 0.4521, + "step": 1522 + }, + { + "epoch": 1.01, + "learning_rate": 1.6955893447193225e-05, + "loss": 0.4778, + "step": 1523 + }, + { + "epoch": 1.01, + "learning_rate": 1.6952086325225945e-05, + "loss": 0.5071, + "step": 1524 + }, + { + "epoch": 1.01, + "learning_rate": 1.6948277252041957e-05, + "loss": 0.4821, + "step": 1525 + }, + { + "epoch": 1.01, + "learning_rate": 1.694446622871035e-05, + "loss": 0.5019, + "step": 1526 + }, + { + "epoch": 1.01, + "learning_rate": 1.6940653256300745e-05, + "loss": 0.4926, + "step": 1527 + }, + { + "epoch": 1.01, + "learning_rate": 1.6936838335883316e-05, + "loss": 0.5114, + "step": 1528 + }, + { + "epoch": 1.01, + "learning_rate": 1.6933021468528786e-05, + "loss": 0.5159, + "step": 1529 + }, + { + "epoch": 1.01, + "learning_rate": 1.6929202655308414e-05, + "loss": 0.5111, + "step": 1530 + }, + { + "epoch": 1.01, + "learning_rate": 1.692538189729402e-05, + "loss": 0.5076, + "step": 1531 + }, + { + "epoch": 1.01, + "learning_rate": 1.6921559195557958e-05, + "loss": 0.479, + "step": 1532 + }, + { + "epoch": 1.01, + "learning_rate": 1.691773455117313e-05, + "loss": 0.5125, + "step": 1533 + }, + { + "epoch": 1.01, + "learning_rate": 1.691390796521299e-05, + "loss": 0.4801, + "step": 1534 + }, + { + "epoch": 1.02, + "learning_rate": 1.691007943875153e-05, + "loss": 0.4866, + "step": 1535 + }, + { + "epoch": 1.02, + "learning_rate": 1.6906248972863285e-05, + "loss": 0.4519, + "step": 1536 + }, + { + "epoch": 1.02, + "learning_rate": 1.6902416568623344e-05, + "loss": 0.4643, + "step": 1537 + }, + { + "epoch": 1.02, + "learning_rate": 1.6898582227107326e-05, + "loss": 0.5046, + "step": 1538 + }, + { + "epoch": 1.02, + "learning_rate": 1.689474594939141e-05, + "loss": 0.4605, + "step": 1539 + }, + { + "epoch": 1.02, + "learning_rate": 1.689090773655231e-05, + "loss": 0.5053, + "step": 1540 + }, + { + "epoch": 1.02, + "learning_rate": 1.6887067589667278e-05, + "loss": 0.5014, + "step": 1541 + }, + { + "epoch": 1.02, + "learning_rate": 1.688322550981411e-05, + "loss": 0.4573, + "step": 1542 + }, + { + "epoch": 1.02, + "learning_rate": 1.6879381498071163e-05, + "loss": 0.4963, + "step": 1543 + }, + { + "epoch": 1.02, + "learning_rate": 1.6875535555517302e-05, + "loss": 0.4797, + "step": 1544 + }, + { + "epoch": 1.02, + "learning_rate": 1.6871687683231975e-05, + "loss": 0.4845, + "step": 1545 + }, + { + "epoch": 1.02, + "learning_rate": 1.6867837882295135e-05, + "loss": 0.4384, + "step": 1546 + }, + { + "epoch": 1.02, + "learning_rate": 1.6863986153787297e-05, + "loss": 0.4952, + "step": 1547 + }, + { + "epoch": 1.02, + "learning_rate": 1.686013249878951e-05, + "loss": 0.475, + "step": 1548 + }, + { + "epoch": 1.03, + "learning_rate": 1.6856276918383368e-05, + "loss": 0.4936, + "step": 1549 + }, + { + "epoch": 1.03, + "learning_rate": 1.6852419413651003e-05, + "loss": 0.4954, + "step": 1550 + }, + { + "epoch": 1.03, + "learning_rate": 1.684855998567508e-05, + "loss": 0.4921, + "step": 1551 + }, + { + "epoch": 1.03, + "learning_rate": 1.684469863553882e-05, + "loss": 0.5014, + "step": 1552 + }, + { + "epoch": 1.03, + "learning_rate": 1.684083536432597e-05, + "loss": 0.4892, + "step": 1553 + }, + { + "epoch": 1.03, + "learning_rate": 1.6836970173120818e-05, + "loss": 0.4969, + "step": 1554 + }, + { + "epoch": 1.03, + "learning_rate": 1.6833103063008194e-05, + "loss": 0.4983, + "step": 1555 + }, + { + "epoch": 1.03, + "learning_rate": 1.6829234035073464e-05, + "loss": 0.5034, + "step": 1556 + }, + { + "epoch": 1.03, + "learning_rate": 1.682536309040254e-05, + "loss": 0.4834, + "step": 1557 + }, + { + "epoch": 1.03, + "learning_rate": 1.6821490230081864e-05, + "loss": 0.464, + "step": 1558 + }, + { + "epoch": 1.03, + "learning_rate": 1.6817615455198414e-05, + "loss": 0.4887, + "step": 1559 + }, + { + "epoch": 1.03, + "learning_rate": 1.681373876683971e-05, + "loss": 0.4895, + "step": 1560 + }, + { + "epoch": 1.03, + "learning_rate": 1.680986016609381e-05, + "loss": 0.4953, + "step": 1561 + }, + { + "epoch": 1.03, + "learning_rate": 1.6805979654049305e-05, + "loss": 0.4882, + "step": 1562 + }, + { + "epoch": 1.03, + "learning_rate": 1.6802097231795324e-05, + "loss": 0.5271, + "step": 1563 + }, + { + "epoch": 1.04, + "learning_rate": 1.6798212900421536e-05, + "loss": 0.4953, + "step": 1564 + }, + { + "epoch": 1.04, + "learning_rate": 1.6794326661018136e-05, + "loss": 0.4628, + "step": 1565 + }, + { + "epoch": 1.04, + "learning_rate": 1.6790438514675865e-05, + "loss": 0.4971, + "step": 1566 + }, + { + "epoch": 1.04, + "learning_rate": 1.6786548462485994e-05, + "loss": 0.4879, + "step": 1567 + }, + { + "epoch": 1.04, + "learning_rate": 1.678265650554033e-05, + "loss": 0.4589, + "step": 1568 + }, + { + "epoch": 1.04, + "learning_rate": 1.6778762644931213e-05, + "loss": 0.4851, + "step": 1569 + }, + { + "epoch": 1.04, + "learning_rate": 1.6774866881751518e-05, + "loss": 0.4966, + "step": 1570 + }, + { + "epoch": 1.04, + "learning_rate": 1.6770969217094657e-05, + "loss": 0.5129, + "step": 1571 + }, + { + "epoch": 1.04, + "learning_rate": 1.676706965205457e-05, + "loss": 0.5025, + "step": 1572 + }, + { + "epoch": 1.04, + "learning_rate": 1.6763168187725742e-05, + "loss": 0.5246, + "step": 1573 + }, + { + "epoch": 1.04, + "learning_rate": 1.6759264825203172e-05, + "loss": 0.4962, + "step": 1574 + }, + { + "epoch": 1.04, + "learning_rate": 1.6755359565582408e-05, + "loss": 0.4804, + "step": 1575 + }, + { + "epoch": 1.04, + "learning_rate": 1.6751452409959527e-05, + "loss": 0.4765, + "step": 1576 + }, + { + "epoch": 1.04, + "learning_rate": 1.674754335943113e-05, + "loss": 0.5119, + "step": 1577 + }, + { + "epoch": 1.04, + "learning_rate": 1.674363241509436e-05, + "loss": 0.5444, + "step": 1578 + }, + { + "epoch": 1.05, + "learning_rate": 1.6739719578046888e-05, + "loss": 0.4934, + "step": 1579 + }, + { + "epoch": 1.05, + "learning_rate": 1.6735804849386914e-05, + "loss": 0.5107, + "step": 1580 + }, + { + "epoch": 1.05, + "learning_rate": 1.6731888230213172e-05, + "loss": 0.5445, + "step": 1581 + }, + { + "epoch": 1.05, + "learning_rate": 1.6727969721624923e-05, + "loss": 0.5188, + "step": 1582 + }, + { + "epoch": 1.05, + "learning_rate": 1.672404932472196e-05, + "loss": 0.5017, + "step": 1583 + }, + { + "epoch": 1.05, + "learning_rate": 1.672012704060461e-05, + "loss": 0.5127, + "step": 1584 + }, + { + "epoch": 1.05, + "learning_rate": 1.6716202870373726e-05, + "loss": 0.4733, + "step": 1585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6712276815130688e-05, + "loss": 0.5149, + "step": 1586 + }, + { + "epoch": 1.05, + "learning_rate": 1.670834887597741e-05, + "loss": 0.4659, + "step": 1587 + }, + { + "epoch": 1.05, + "learning_rate": 1.670441905401633e-05, + "loss": 0.4702, + "step": 1588 + }, + { + "epoch": 1.05, + "learning_rate": 1.6700487350350416e-05, + "loss": 0.4714, + "step": 1589 + }, + { + "epoch": 1.05, + "learning_rate": 1.6696553766083167e-05, + "loss": 0.48, + "step": 1590 + }, + { + "epoch": 1.05, + "learning_rate": 1.669261830231861e-05, + "loss": 0.4869, + "step": 1591 + }, + { + "epoch": 1.05, + "learning_rate": 1.6688680960161292e-05, + "loss": 0.4932, + "step": 1592 + }, + { + "epoch": 1.05, + "learning_rate": 1.66847417407163e-05, + "loss": 0.5002, + "step": 1593 + }, + { + "epoch": 1.06, + "learning_rate": 1.668080064508923e-05, + "loss": 0.4699, + "step": 1594 + }, + { + "epoch": 1.06, + "learning_rate": 1.667685767438622e-05, + "loss": 0.4734, + "step": 1595 + }, + { + "epoch": 1.06, + "learning_rate": 1.6672912829713936e-05, + "loss": 0.506, + "step": 1596 + }, + { + "epoch": 1.06, + "learning_rate": 1.666896611217955e-05, + "loss": 0.5154, + "step": 1597 + }, + { + "epoch": 1.06, + "learning_rate": 1.6665017522890786e-05, + "loss": 0.4628, + "step": 1598 + }, + { + "epoch": 1.06, + "learning_rate": 1.6661067062955868e-05, + "loss": 0.4881, + "step": 1599 + }, + { + "epoch": 1.06, + "learning_rate": 1.6657114733483564e-05, + "loss": 0.5222, + "step": 1600 + }, + { + "epoch": 1.06, + "learning_rate": 1.665316053558316e-05, + "loss": 0.4997, + "step": 1601 + }, + { + "epoch": 1.06, + "learning_rate": 1.6649204470364467e-05, + "loss": 0.4897, + "step": 1602 + }, + { + "epoch": 1.06, + "learning_rate": 1.6645246538937815e-05, + "loss": 0.4963, + "step": 1603 + }, + { + "epoch": 1.06, + "learning_rate": 1.6641286742414066e-05, + "loss": 0.4787, + "step": 1604 + }, + { + "epoch": 1.06, + "learning_rate": 1.6637325081904595e-05, + "loss": 0.4995, + "step": 1605 + }, + { + "epoch": 1.06, + "learning_rate": 1.663336155852132e-05, + "loss": 0.5118, + "step": 1606 + }, + { + "epoch": 1.06, + "learning_rate": 1.6629396173376656e-05, + "loss": 0.5022, + "step": 1607 + }, + { + "epoch": 1.06, + "learning_rate": 1.662542892758356e-05, + "loss": 0.5062, + "step": 1608 + }, + { + "epoch": 1.07, + "learning_rate": 1.66214598222555e-05, + "loss": 0.5097, + "step": 1609 + }, + { + "epoch": 1.07, + "learning_rate": 1.6617488858506478e-05, + "loss": 0.4886, + "step": 1610 + }, + { + "epoch": 1.07, + "learning_rate": 1.6613516037451e-05, + "loss": 0.4908, + "step": 1611 + }, + { + "epoch": 1.07, + "learning_rate": 1.6609541360204108e-05, + "loss": 0.4705, + "step": 1612 + }, + { + "epoch": 1.07, + "learning_rate": 1.660556482788136e-05, + "loss": 0.5641, + "step": 1613 + }, + { + "epoch": 1.07, + "learning_rate": 1.6601586441598834e-05, + "loss": 0.501, + "step": 1614 + }, + { + "epoch": 1.07, + "learning_rate": 1.659760620247313e-05, + "loss": 0.4935, + "step": 1615 + }, + { + "epoch": 1.07, + "learning_rate": 1.659362411162137e-05, + "loss": 0.5196, + "step": 1616 + }, + { + "epoch": 1.07, + "learning_rate": 1.6589640170161188e-05, + "loss": 0.4981, + "step": 1617 + }, + { + "epoch": 1.07, + "learning_rate": 1.6585654379210745e-05, + "loss": 0.5202, + "step": 1618 + }, + { + "epoch": 1.07, + "learning_rate": 1.6581666739888715e-05, + "loss": 0.4837, + "step": 1619 + }, + { + "epoch": 1.07, + "learning_rate": 1.65776772533143e-05, + "loss": 0.4857, + "step": 1620 + }, + { + "epoch": 1.07, + "learning_rate": 1.657368592060721e-05, + "loss": 0.4868, + "step": 1621 + }, + { + "epoch": 1.07, + "learning_rate": 1.656969274288768e-05, + "loss": 0.5207, + "step": 1622 + }, + { + "epoch": 1.07, + "learning_rate": 1.656569772127646e-05, + "loss": 0.5117, + "step": 1623 + }, + { + "epoch": 1.08, + "learning_rate": 1.6561700856894817e-05, + "loss": 0.5011, + "step": 1624 + }, + { + "epoch": 1.08, + "learning_rate": 1.6557702150864538e-05, + "loss": 0.4876, + "step": 1625 + }, + { + "epoch": 1.08, + "learning_rate": 1.6553701604307924e-05, + "loss": 0.5139, + "step": 1626 + }, + { + "epoch": 1.08, + "learning_rate": 1.6549699218347796e-05, + "loss": 0.4967, + "step": 1627 + }, + { + "epoch": 1.08, + "learning_rate": 1.654569499410749e-05, + "loss": 0.4945, + "step": 1628 + }, + { + "epoch": 1.08, + "learning_rate": 1.654168893271085e-05, + "loss": 0.498, + "step": 1629 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537681035282247e-05, + "loss": 0.4923, + "step": 1630 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533671302946566e-05, + "loss": 0.5004, + "step": 1631 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529659736829197e-05, + "loss": 0.5208, + "step": 1632 + }, + { + "epoch": 1.08, + "learning_rate": 1.652564633805606e-05, + "loss": 0.4744, + "step": 1633 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521631107753575e-05, + "loss": 0.5152, + "step": 1634 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517614047048683e-05, + "loss": 0.5014, + "step": 1635 + }, + { + "epoch": 1.08, + "learning_rate": 1.6513595157068837e-05, + "loss": 0.4904, + "step": 1636 + }, + { + "epoch": 1.08, + "learning_rate": 1.650957443894201e-05, + "loss": 0.4434, + "step": 1637 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505551893796673e-05, + "loss": 0.481, + "step": 1638 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501527522761828e-05, + "loss": 0.497, + "step": 1639 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497501326966974e-05, + "loss": 0.5048, + "step": 1640 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493473307542132e-05, + "loss": 0.509, + "step": 1641 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489443465617832e-05, + "loss": 0.4771, + "step": 1642 + }, + { + "epoch": 1.09, + "learning_rate": 1.648541180232511e-05, + "loss": 0.5047, + "step": 1643 + }, + { + "epoch": 1.09, + "learning_rate": 1.6481378318795528e-05, + "loss": 0.4742, + "step": 1644 + }, + { + "epoch": 1.09, + "learning_rate": 1.6477343016161138e-05, + "loss": 0.4916, + "step": 1645 + }, + { + "epoch": 1.09, + "learning_rate": 1.6473305895554522e-05, + "loss": 0.4914, + "step": 1646 + }, + { + "epoch": 1.09, + "learning_rate": 1.6469266958108757e-05, + "loss": 0.4415, + "step": 1647 + }, + { + "epoch": 1.09, + "learning_rate": 1.6465226204957444e-05, + "loss": 0.4514, + "step": 1648 + }, + { + "epoch": 1.09, + "learning_rate": 1.646118363723468e-05, + "loss": 0.5003, + "step": 1649 + }, + { + "epoch": 1.09, + "learning_rate": 1.6457139256075084e-05, + "loss": 0.4971, + "step": 1650 + }, + { + "epoch": 1.09, + "learning_rate": 1.6453093062613774e-05, + "loss": 0.4783, + "step": 1651 + }, + { + "epoch": 1.09, + "learning_rate": 1.6449045057986376e-05, + "loss": 0.4858, + "step": 1652 + }, + { + "epoch": 1.1, + "learning_rate": 1.644499524332904e-05, + "loss": 0.5081, + "step": 1653 + }, + { + "epoch": 1.1, + "learning_rate": 1.6440943619778403e-05, + "loss": 0.5069, + "step": 1654 + }, + { + "epoch": 1.1, + "learning_rate": 1.6436890188471622e-05, + "loss": 0.5034, + "step": 1655 + }, + { + "epoch": 1.1, + "learning_rate": 1.643283495054636e-05, + "loss": 0.4961, + "step": 1656 + }, + { + "epoch": 1.1, + "learning_rate": 1.642877790714078e-05, + "loss": 0.4751, + "step": 1657 + }, + { + "epoch": 1.1, + "learning_rate": 1.642471905939357e-05, + "loss": 0.4604, + "step": 1658 + }, + { + "epoch": 1.1, + "learning_rate": 1.6420658408443904e-05, + "loss": 0.4928, + "step": 1659 + }, + { + "epoch": 1.1, + "learning_rate": 1.6416595955431468e-05, + "loss": 0.4956, + "step": 1660 + }, + { + "epoch": 1.1, + "learning_rate": 1.641253170149646e-05, + "loss": 0.5046, + "step": 1661 + }, + { + "epoch": 1.1, + "learning_rate": 1.6408465647779578e-05, + "loss": 0.4758, + "step": 1662 + }, + { + "epoch": 1.1, + "learning_rate": 1.6404397795422024e-05, + "loss": 0.4873, + "step": 1663 + }, + { + "epoch": 1.1, + "learning_rate": 1.640032814556551e-05, + "loss": 0.4867, + "step": 1664 + }, + { + "epoch": 1.1, + "learning_rate": 1.6396256699352252e-05, + "loss": 0.4854, + "step": 1665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6392183457924967e-05, + "loss": 0.4952, + "step": 1666 + }, + { + "epoch": 1.1, + "learning_rate": 1.6388108422426873e-05, + "loss": 0.4483, + "step": 1667 + }, + { + "epoch": 1.11, + "learning_rate": 1.6384031594001698e-05, + "loss": 0.4868, + "step": 1668 + }, + { + "epoch": 1.11, + "learning_rate": 1.637995297379367e-05, + "loss": 0.5239, + "step": 1669 + }, + { + "epoch": 1.11, + "learning_rate": 1.6375872562947516e-05, + "loss": 0.5117, + "step": 1670 + }, + { + "epoch": 1.11, + "learning_rate": 1.637179036260848e-05, + "loss": 0.5197, + "step": 1671 + }, + { + "epoch": 1.11, + "learning_rate": 1.636770637392229e-05, + "loss": 0.4781, + "step": 1672 + }, + { + "epoch": 1.11, + "learning_rate": 1.636362059803519e-05, + "loss": 0.4938, + "step": 1673 + }, + { + "epoch": 1.11, + "learning_rate": 1.6359533036093915e-05, + "loss": 0.5139, + "step": 1674 + }, + { + "epoch": 1.11, + "learning_rate": 1.635544368924571e-05, + "loss": 0.5368, + "step": 1675 + }, + { + "epoch": 1.11, + "learning_rate": 1.6351352558638313e-05, + "loss": 0.5025, + "step": 1676 + }, + { + "epoch": 1.11, + "learning_rate": 1.6347259645419966e-05, + "loss": 0.4668, + "step": 1677 + }, + { + "epoch": 1.11, + "learning_rate": 1.6343164950739417e-05, + "loss": 0.4899, + "step": 1678 + }, + { + "epoch": 1.11, + "learning_rate": 1.633906847574591e-05, + "loss": 0.5279, + "step": 1679 + }, + { + "epoch": 1.11, + "learning_rate": 1.6334970221589182e-05, + "loss": 0.4785, + "step": 1680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6330870189419475e-05, + "loss": 0.5061, + "step": 1681 + }, + { + "epoch": 1.11, + "learning_rate": 1.6326768380387538e-05, + "loss": 0.4853, + "step": 1682 + }, + { + "epoch": 1.12, + "learning_rate": 1.6322664795644604e-05, + "loss": 0.495, + "step": 1683 + }, + { + "epoch": 1.12, + "learning_rate": 1.631855943634241e-05, + "loss": 0.4349, + "step": 1684 + }, + { + "epoch": 1.12, + "learning_rate": 1.6314452303633193e-05, + "loss": 0.5487, + "step": 1685 + }, + { + "epoch": 1.12, + "learning_rate": 1.6310343398669693e-05, + "loss": 0.4936, + "step": 1686 + }, + { + "epoch": 1.12, + "learning_rate": 1.6306232722605133e-05, + "loss": 0.4722, + "step": 1687 + }, + { + "epoch": 1.12, + "learning_rate": 1.6302120276593248e-05, + "loss": 0.5042, + "step": 1688 + }, + { + "epoch": 1.12, + "learning_rate": 1.6298006061788264e-05, + "loss": 0.5306, + "step": 1689 + }, + { + "epoch": 1.12, + "learning_rate": 1.6293890079344892e-05, + "loss": 0.5045, + "step": 1690 + }, + { + "epoch": 1.12, + "learning_rate": 1.6289772330418365e-05, + "loss": 0.4998, + "step": 1691 + }, + { + "epoch": 1.12, + "learning_rate": 1.6285652816164384e-05, + "loss": 0.5169, + "step": 1692 + }, + { + "epoch": 1.12, + "learning_rate": 1.6281531537739163e-05, + "loss": 0.5023, + "step": 1693 + }, + { + "epoch": 1.12, + "learning_rate": 1.6277408496299406e-05, + "loss": 0.4869, + "step": 1694 + }, + { + "epoch": 1.12, + "learning_rate": 1.6273283693002312e-05, + "loss": 0.5068, + "step": 1695 + }, + { + "epoch": 1.12, + "learning_rate": 1.6269157129005573e-05, + "loss": 0.4735, + "step": 1696 + }, + { + "epoch": 1.12, + "learning_rate": 1.626502880546738e-05, + "loss": 0.48, + "step": 1697 + }, + { + "epoch": 1.13, + "learning_rate": 1.6260898723546416e-05, + "loss": 0.5318, + "step": 1698 + }, + { + "epoch": 1.13, + "learning_rate": 1.625676688440185e-05, + "loss": 0.5035, + "step": 1699 + }, + { + "epoch": 1.13, + "learning_rate": 1.625263328919335e-05, + "loss": 0.4679, + "step": 1700 + }, + { + "epoch": 1.13, + "learning_rate": 1.6248497939081082e-05, + "loss": 0.5333, + "step": 1701 + }, + { + "epoch": 1.13, + "learning_rate": 1.6244360835225698e-05, + "loss": 0.4487, + "step": 1702 + }, + { + "epoch": 1.13, + "learning_rate": 1.624022197878834e-05, + "loss": 0.4921, + "step": 1703 + }, + { + "epoch": 1.13, + "learning_rate": 1.623608137093065e-05, + "loss": 0.4767, + "step": 1704 + }, + { + "epoch": 1.13, + "learning_rate": 1.6231939012814758e-05, + "loss": 0.4884, + "step": 1705 + }, + { + "epoch": 1.13, + "learning_rate": 1.622779490560328e-05, + "loss": 0.4936, + "step": 1706 + }, + { + "epoch": 1.13, + "learning_rate": 1.6223649050459337e-05, + "loss": 0.5619, + "step": 1707 + }, + { + "epoch": 1.13, + "learning_rate": 1.621950144854652e-05, + "loss": 0.5088, + "step": 1708 + }, + { + "epoch": 1.13, + "learning_rate": 1.6215352101028926e-05, + "loss": 0.5222, + "step": 1709 + }, + { + "epoch": 1.13, + "learning_rate": 1.6211201009071134e-05, + "loss": 0.4876, + "step": 1710 + }, + { + "epoch": 1.13, + "learning_rate": 1.6207048173838226e-05, + "loss": 0.4759, + "step": 1711 + }, + { + "epoch": 1.13, + "learning_rate": 1.620289359649575e-05, + "loss": 0.5471, + "step": 1712 + }, + { + "epoch": 1.14, + "learning_rate": 1.6198737278209763e-05, + "loss": 0.4909, + "step": 1713 + }, + { + "epoch": 1.14, + "learning_rate": 1.6194579220146806e-05, + "loss": 0.5141, + "step": 1714 + }, + { + "epoch": 1.14, + "learning_rate": 1.6190419423473897e-05, + "loss": 0.5057, + "step": 1715 + }, + { + "epoch": 1.14, + "learning_rate": 1.6186257889358557e-05, + "loss": 0.5086, + "step": 1716 + }, + { + "epoch": 1.14, + "learning_rate": 1.6182094618968793e-05, + "loss": 0.4666, + "step": 1717 + }, + { + "epoch": 1.14, + "learning_rate": 1.6177929613473088e-05, + "loss": 0.5046, + "step": 1718 + }, + { + "epoch": 1.14, + "learning_rate": 1.617376287404042e-05, + "loss": 0.4704, + "step": 1719 + }, + { + "epoch": 1.14, + "learning_rate": 1.6169594401840255e-05, + "loss": 0.4911, + "step": 1720 + }, + { + "epoch": 1.14, + "learning_rate": 1.6165424198042542e-05, + "loss": 0.4914, + "step": 1721 + }, + { + "epoch": 1.14, + "learning_rate": 1.6161252263817715e-05, + "loss": 0.4775, + "step": 1722 + }, + { + "epoch": 1.14, + "learning_rate": 1.6157078600336693e-05, + "loss": 0.4704, + "step": 1723 + }, + { + "epoch": 1.14, + "learning_rate": 1.6152903208770888e-05, + "loss": 0.4702, + "step": 1724 + }, + { + "epoch": 1.14, + "learning_rate": 1.6148726090292196e-05, + "loss": 0.5048, + "step": 1725 + }, + { + "epoch": 1.14, + "learning_rate": 1.6144547246072984e-05, + "loss": 0.4896, + "step": 1726 + }, + { + "epoch": 1.14, + "learning_rate": 1.614036667728612e-05, + "loss": 0.506, + "step": 1727 + }, + { + "epoch": 1.15, + "learning_rate": 1.6136184385104945e-05, + "loss": 0.4839, + "step": 1728 + }, + { + "epoch": 1.15, + "learning_rate": 1.6132000370703286e-05, + "loss": 0.4753, + "step": 1729 + }, + { + "epoch": 1.15, + "learning_rate": 1.6127814635255462e-05, + "loss": 0.4943, + "step": 1730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6123627179936262e-05, + "loss": 0.5303, + "step": 1731 + }, + { + "epoch": 1.15, + "learning_rate": 1.6119438005920968e-05, + "loss": 0.4707, + "step": 1732 + }, + { + "epoch": 1.15, + "learning_rate": 1.611524711438533e-05, + "loss": 0.5114, + "step": 1733 + }, + { + "epoch": 1.15, + "learning_rate": 1.6111054506505607e-05, + "loss": 0.4873, + "step": 1734 + }, + { + "epoch": 1.15, + "learning_rate": 1.6106860183458514e-05, + "loss": 0.4573, + "step": 1735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6102664146421255e-05, + "loss": 0.4946, + "step": 1736 + }, + { + "epoch": 1.15, + "learning_rate": 1.6098466396571514e-05, + "loss": 0.5046, + "step": 1737 + }, + { + "epoch": 1.15, + "learning_rate": 1.6094266935087467e-05, + "loss": 0.5017, + "step": 1738 + }, + { + "epoch": 1.15, + "learning_rate": 1.6090065763147755e-05, + "loss": 0.5371, + "step": 1739 + }, + { + "epoch": 1.15, + "learning_rate": 1.608586288193151e-05, + "loss": 0.4745, + "step": 1740 + }, + { + "epoch": 1.15, + "learning_rate": 1.608165829261833e-05, + "loss": 0.4906, + "step": 1741 + }, + { + "epoch": 1.16, + "learning_rate": 1.6077451996388314e-05, + "loss": 0.4589, + "step": 1742 + }, + { + "epoch": 1.16, + "learning_rate": 1.6073243994422018e-05, + "loss": 0.4641, + "step": 1743 + }, + { + "epoch": 1.16, + "learning_rate": 1.6069034287900493e-05, + "loss": 0.4957, + "step": 1744 + }, + { + "epoch": 1.16, + "learning_rate": 1.6064822878005262e-05, + "loss": 0.479, + "step": 1745 + }, + { + "epoch": 1.16, + "learning_rate": 1.606060976591832e-05, + "loss": 0.5074, + "step": 1746 + }, + { + "epoch": 1.16, + "learning_rate": 1.605639495282215e-05, + "loss": 0.4891, + "step": 1747 + }, + { + "epoch": 1.16, + "learning_rate": 1.6052178439899712e-05, + "loss": 0.488, + "step": 1748 + }, + { + "epoch": 1.16, + "learning_rate": 1.6047960228334428e-05, + "loss": 0.4882, + "step": 1749 + }, + { + "epoch": 1.16, + "learning_rate": 1.6043740319310218e-05, + "loss": 0.4828, + "step": 1750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6039518714011465e-05, + "loss": 0.4835, + "step": 1751 + }, + { + "epoch": 1.16, + "learning_rate": 1.6035295413623032e-05, + "loss": 0.5265, + "step": 1752 + }, + { + "epoch": 1.16, + "learning_rate": 1.6031070419330258e-05, + "loss": 0.4545, + "step": 1753 + }, + { + "epoch": 1.16, + "learning_rate": 1.6026843732318958e-05, + "loss": 0.4948, + "step": 1754 + }, + { + "epoch": 1.16, + "learning_rate": 1.602261535377542e-05, + "loss": 0.5852, + "step": 1755 + }, + { + "epoch": 1.16, + "learning_rate": 1.60183852848864e-05, + "loss": 0.4922, + "step": 1756 + }, + { + "epoch": 1.17, + "learning_rate": 1.601415352683915e-05, + "loss": 0.5126, + "step": 1757 + }, + { + "epoch": 1.17, + "learning_rate": 1.6009920080821365e-05, + "loss": 0.4729, + "step": 1758 + }, + { + "epoch": 1.17, + "learning_rate": 1.6005684948021248e-05, + "loss": 0.5047, + "step": 1759 + }, + { + "epoch": 1.17, + "learning_rate": 1.600144812962745e-05, + "loss": 0.4839, + "step": 1760 + }, + { + "epoch": 1.17, + "learning_rate": 1.5997209626829105e-05, + "loss": 0.4733, + "step": 1761 + }, + { + "epoch": 1.17, + "learning_rate": 1.5992969440815813e-05, + "loss": 0.4462, + "step": 1762 + }, + { + "epoch": 1.17, + "learning_rate": 1.598872757277766e-05, + "loss": 0.4996, + "step": 1763 + }, + { + "epoch": 1.17, + "learning_rate": 1.5984484023905186e-05, + "loss": 0.4953, + "step": 1764 + }, + { + "epoch": 1.17, + "learning_rate": 1.5980238795389424e-05, + "loss": 0.4991, + "step": 1765 + }, + { + "epoch": 1.17, + "learning_rate": 1.5975991888421857e-05, + "loss": 0.4656, + "step": 1766 + }, + { + "epoch": 1.17, + "learning_rate": 1.597174330419445e-05, + "loss": 0.4827, + "step": 1767 + }, + { + "epoch": 1.17, + "learning_rate": 1.5967493043899644e-05, + "loss": 0.5212, + "step": 1768 + }, + { + "epoch": 1.17, + "learning_rate": 1.5963241108730342e-05, + "loss": 0.5176, + "step": 1769 + }, + { + "epoch": 1.17, + "learning_rate": 1.595898749987991e-05, + "loss": 0.478, + "step": 1770 + }, + { + "epoch": 1.17, + "learning_rate": 1.5954732218542207e-05, + "loss": 0.4943, + "step": 1771 + }, + { + "epoch": 1.18, + "learning_rate": 1.5950475265911537e-05, + "loss": 0.5017, + "step": 1772 + }, + { + "epoch": 1.18, + "learning_rate": 1.5946216643182685e-05, + "loss": 0.5064, + "step": 1773 + }, + { + "epoch": 1.18, + "learning_rate": 1.5941956351550908e-05, + "loss": 0.4653, + "step": 1774 + }, + { + "epoch": 1.18, + "learning_rate": 1.5937694392211923e-05, + "loss": 0.5459, + "step": 1775 + }, + { + "epoch": 1.18, + "learning_rate": 1.593343076636192e-05, + "loss": 0.5163, + "step": 1776 + }, + { + "epoch": 1.18, + "learning_rate": 1.5929165475197553e-05, + "loss": 0.4803, + "step": 1777 + }, + { + "epoch": 1.18, + "learning_rate": 1.5924898519915947e-05, + "loss": 0.4616, + "step": 1778 + }, + { + "epoch": 1.18, + "learning_rate": 1.592062990171469e-05, + "loss": 0.5039, + "step": 1779 + }, + { + "epoch": 1.18, + "learning_rate": 1.5916359621791847e-05, + "loss": 0.4811, + "step": 1780 + }, + { + "epoch": 1.18, + "learning_rate": 1.5912087681345934e-05, + "loss": 0.488, + "step": 1781 + }, + { + "epoch": 1.18, + "learning_rate": 1.5907814081575943e-05, + "loss": 0.4915, + "step": 1782 + }, + { + "epoch": 1.18, + "learning_rate": 1.590353882368133e-05, + "loss": 0.4763, + "step": 1783 + }, + { + "epoch": 1.18, + "learning_rate": 1.589926190886202e-05, + "loss": 0.5249, + "step": 1784 + }, + { + "epoch": 1.18, + "learning_rate": 1.5894983338318396e-05, + "loss": 0.4959, + "step": 1785 + }, + { + "epoch": 1.18, + "learning_rate": 1.5890703113251305e-05, + "loss": 0.4974, + "step": 1786 + }, + { + "epoch": 1.19, + "learning_rate": 1.5886421234862072e-05, + "loss": 0.4823, + "step": 1787 + }, + { + "epoch": 1.19, + "learning_rate": 1.5882137704352466e-05, + "loss": 0.5042, + "step": 1788 + }, + { + "epoch": 1.19, + "learning_rate": 1.5877852522924733e-05, + "loss": 0.5092, + "step": 1789 + }, + { + "epoch": 1.19, + "learning_rate": 1.587356569178158e-05, + "loss": 0.5036, + "step": 1790 + }, + { + "epoch": 1.19, + "learning_rate": 1.586927721212618e-05, + "loss": 0.4688, + "step": 1791 + }, + { + "epoch": 1.19, + "learning_rate": 1.5864987085162155e-05, + "loss": 0.5075, + "step": 1792 + }, + { + "epoch": 1.19, + "learning_rate": 1.5860695312093608e-05, + "loss": 0.4574, + "step": 1793 + }, + { + "epoch": 1.19, + "learning_rate": 1.5856401894125095e-05, + "loss": 0.49, + "step": 1794 + }, + { + "epoch": 1.19, + "learning_rate": 1.585210683246163e-05, + "loss": 0.4857, + "step": 1795 + }, + { + "epoch": 1.19, + "learning_rate": 1.5847810128308695e-05, + "loss": 0.5273, + "step": 1796 + }, + { + "epoch": 1.19, + "learning_rate": 1.5843511782872226e-05, + "loss": 0.4779, + "step": 1797 + }, + { + "epoch": 1.19, + "learning_rate": 1.583921179735863e-05, + "loss": 0.5096, + "step": 1798 + }, + { + "epoch": 1.19, + "learning_rate": 1.5834910172974767e-05, + "loss": 0.5149, + "step": 1799 + }, + { + "epoch": 1.19, + "learning_rate": 1.5830606910927956e-05, + "loss": 0.5018, + "step": 1800 + }, + { + "epoch": 1.19, + "learning_rate": 1.5826302012425977e-05, + "loss": 0.5022, + "step": 1801 + }, + { + "epoch": 1.2, + "learning_rate": 1.582199547867707e-05, + "loss": 0.4903, + "step": 1802 + }, + { + "epoch": 1.2, + "learning_rate": 1.581768731088994e-05, + "loss": 0.4917, + "step": 1803 + }, + { + "epoch": 1.2, + "learning_rate": 1.581337751027374e-05, + "loss": 0.4503, + "step": 1804 + }, + { + "epoch": 1.2, + "learning_rate": 1.5809066078038082e-05, + "loss": 0.4935, + "step": 1805 + }, + { + "epoch": 1.2, + "learning_rate": 1.5804753015393045e-05, + "loss": 0.4633, + "step": 1806 + }, + { + "epoch": 1.2, + "learning_rate": 1.5800438323549167e-05, + "loss": 0.4962, + "step": 1807 + }, + { + "epoch": 1.2, + "learning_rate": 1.5796122003717424e-05, + "loss": 0.4695, + "step": 1808 + }, + { + "epoch": 1.2, + "learning_rate": 1.5791804057109266e-05, + "loss": 0.4752, + "step": 1809 + }, + { + "epoch": 1.2, + "learning_rate": 1.57874844849366e-05, + "loss": 0.5203, + "step": 1810 + }, + { + "epoch": 1.2, + "learning_rate": 1.578316328841178e-05, + "loss": 0.5127, + "step": 1811 + }, + { + "epoch": 1.2, + "learning_rate": 1.5778840468747628e-05, + "loss": 0.4468, + "step": 1812 + }, + { + "epoch": 1.2, + "learning_rate": 1.57745160271574e-05, + "loss": 0.4872, + "step": 1813 + }, + { + "epoch": 1.2, + "learning_rate": 1.5770189964854834e-05, + "loss": 0.4886, + "step": 1814 + }, + { + "epoch": 1.2, + "learning_rate": 1.5765862283054105e-05, + "loss": 0.5162, + "step": 1815 + }, + { + "epoch": 1.2, + "learning_rate": 1.576153298296985e-05, + "loss": 0.4946, + "step": 1816 + }, + { + "epoch": 1.21, + "learning_rate": 1.575720206581715e-05, + "loss": 0.5295, + "step": 1817 + }, + { + "epoch": 1.21, + "learning_rate": 1.5752869532811555e-05, + "loss": 0.513, + "step": 1818 + }, + { + "epoch": 1.21, + "learning_rate": 1.5748535385169062e-05, + "loss": 0.5128, + "step": 1819 + }, + { + "epoch": 1.21, + "learning_rate": 1.5744199624106115e-05, + "loss": 0.482, + "step": 1820 + }, + { + "epoch": 1.21, + "learning_rate": 1.5739862250839623e-05, + "loss": 0.4437, + "step": 1821 + }, + { + "epoch": 1.21, + "learning_rate": 1.5735523266586935e-05, + "loss": 0.5026, + "step": 1822 + }, + { + "epoch": 1.21, + "learning_rate": 1.5731182672565865e-05, + "loss": 0.4651, + "step": 1823 + }, + { + "epoch": 1.21, + "learning_rate": 1.5726840469994658e-05, + "loss": 0.5238, + "step": 1824 + }, + { + "epoch": 1.21, + "learning_rate": 1.572249666009204e-05, + "loss": 0.4972, + "step": 1825 + }, + { + "epoch": 1.21, + "learning_rate": 1.5718151244077162e-05, + "loss": 0.4722, + "step": 1826 + }, + { + "epoch": 1.21, + "learning_rate": 1.571380422316964e-05, + "loss": 0.5043, + "step": 1827 + }, + { + "epoch": 1.21, + "learning_rate": 1.5709455598589537e-05, + "loss": 0.436, + "step": 1828 + }, + { + "epoch": 1.21, + "learning_rate": 1.5705105371557362e-05, + "loss": 0.4849, + "step": 1829 + }, + { + "epoch": 1.21, + "learning_rate": 1.570075354329408e-05, + "loss": 0.4757, + "step": 1830 + }, + { + "epoch": 1.21, + "learning_rate": 1.5696400115021102e-05, + "loss": 0.4852, + "step": 1831 + }, + { + "epoch": 1.22, + "learning_rate": 1.5692045087960294e-05, + "loss": 0.4869, + "step": 1832 + }, + { + "epoch": 1.22, + "learning_rate": 1.5687688463333954e-05, + "loss": 0.5459, + "step": 1833 + }, + { + "epoch": 1.22, + "learning_rate": 1.568333024236485e-05, + "loss": 0.4779, + "step": 1834 + }, + { + "epoch": 1.22, + "learning_rate": 1.5678970426276186e-05, + "loss": 0.508, + "step": 1835 + }, + { + "epoch": 1.22, + "learning_rate": 1.5674609016291613e-05, + "loss": 0.4839, + "step": 1836 + }, + { + "epoch": 1.22, + "learning_rate": 1.5670246013635232e-05, + "loss": 0.4917, + "step": 1837 + }, + { + "epoch": 1.22, + "learning_rate": 1.5665881419531593e-05, + "loss": 0.5003, + "step": 1838 + }, + { + "epoch": 1.22, + "learning_rate": 1.566151523520569e-05, + "loss": 0.4802, + "step": 1839 + }, + { + "epoch": 1.22, + "learning_rate": 1.5657147461882965e-05, + "loss": 0.49, + "step": 1840 + }, + { + "epoch": 1.22, + "learning_rate": 1.5652778100789304e-05, + "loss": 0.4841, + "step": 1841 + }, + { + "epoch": 1.22, + "learning_rate": 1.564840715315104e-05, + "loss": 0.4745, + "step": 1842 + }, + { + "epoch": 1.22, + "learning_rate": 1.5644034620194953e-05, + "loss": 0.5214, + "step": 1843 + }, + { + "epoch": 1.22, + "learning_rate": 1.563966050314826e-05, + "loss": 0.4715, + "step": 1844 + }, + { + "epoch": 1.22, + "learning_rate": 1.5635284803238632e-05, + "loss": 0.4653, + "step": 1845 + }, + { + "epoch": 1.23, + "learning_rate": 1.5630907521694184e-05, + "loss": 0.4968, + "step": 1846 + }, + { + "epoch": 1.23, + "learning_rate": 1.5626528659743466e-05, + "loss": 0.507, + "step": 1847 + }, + { + "epoch": 1.23, + "learning_rate": 1.562214821861548e-05, + "loss": 0.4981, + "step": 1848 + }, + { + "epoch": 1.23, + "learning_rate": 1.561776619953967e-05, + "loss": 0.4915, + "step": 1849 + }, + { + "epoch": 1.23, + "learning_rate": 1.5613382603745918e-05, + "loss": 0.5093, + "step": 1850 + }, + { + "epoch": 1.23, + "learning_rate": 1.560899743246455e-05, + "loss": 0.49, + "step": 1851 + }, + { + "epoch": 1.23, + "learning_rate": 1.5604610686926346e-05, + "loss": 0.4908, + "step": 1852 + }, + { + "epoch": 1.23, + "learning_rate": 1.5600222368362506e-05, + "loss": 0.5194, + "step": 1853 + }, + { + "epoch": 1.23, + "learning_rate": 1.5595832478004685e-05, + "loss": 0.4776, + "step": 1854 + }, + { + "epoch": 1.23, + "learning_rate": 1.559144101708499e-05, + "loss": 0.4756, + "step": 1855 + }, + { + "epoch": 1.23, + "learning_rate": 1.5587047986835942e-05, + "loss": 0.4733, + "step": 1856 + }, + { + "epoch": 1.23, + "learning_rate": 1.558265338849052e-05, + "loss": 0.5164, + "step": 1857 + }, + { + "epoch": 1.23, + "learning_rate": 1.5578257223282146e-05, + "loss": 0.5059, + "step": 1858 + }, + { + "epoch": 1.23, + "learning_rate": 1.5573859492444672e-05, + "loss": 0.4619, + "step": 1859 + }, + { + "epoch": 1.23, + "learning_rate": 1.556946019721239e-05, + "loss": 0.4917, + "step": 1860 + }, + { + "epoch": 1.24, + "learning_rate": 1.556505933882004e-05, + "loss": 0.5133, + "step": 1861 + }, + { + "epoch": 1.24, + "learning_rate": 1.5560656918502787e-05, + "loss": 0.5143, + "step": 1862 + }, + { + "epoch": 1.24, + "learning_rate": 1.555625293749625e-05, + "loss": 0.4908, + "step": 1863 + }, + { + "epoch": 1.24, + "learning_rate": 1.5551847397036476e-05, + "loss": 0.465, + "step": 1864 + }, + { + "epoch": 1.24, + "learning_rate": 1.5547440298359948e-05, + "loss": 0.5334, + "step": 1865 + }, + { + "epoch": 1.24, + "learning_rate": 1.5543031642703594e-05, + "loss": 0.495, + "step": 1866 + }, + { + "epoch": 1.24, + "learning_rate": 1.553862143130478e-05, + "loss": 0.4917, + "step": 1867 + }, + { + "epoch": 1.24, + "learning_rate": 1.553420966540129e-05, + "loss": 0.5022, + "step": 1868 + }, + { + "epoch": 1.24, + "learning_rate": 1.5529796346231376e-05, + "loss": 0.4532, + "step": 1869 + }, + { + "epoch": 1.24, + "learning_rate": 1.5525381475033692e-05, + "loss": 0.4638, + "step": 1870 + }, + { + "epoch": 1.24, + "learning_rate": 1.5520965053047353e-05, + "loss": 0.5026, + "step": 1871 + }, + { + "epoch": 1.24, + "learning_rate": 1.55165470815119e-05, + "loss": 0.4906, + "step": 1872 + }, + { + "epoch": 1.24, + "learning_rate": 1.5512127561667304e-05, + "loss": 0.4698, + "step": 1873 + }, + { + "epoch": 1.24, + "learning_rate": 1.550770649475398e-05, + "loss": 0.4757, + "step": 1874 + }, + { + "epoch": 1.24, + "learning_rate": 1.550328388201277e-05, + "loss": 0.4805, + "step": 1875 + }, + { + "epoch": 1.25, + "learning_rate": 1.5498859724684953e-05, + "loss": 0.4983, + "step": 1876 + }, + { + "epoch": 1.25, + "learning_rate": 1.5494434024012247e-05, + "loss": 0.5818, + "step": 1877 + }, + { + "epoch": 1.25, + "learning_rate": 1.5490006781236785e-05, + "loss": 0.4802, + "step": 1878 + }, + { + "epoch": 1.25, + "learning_rate": 1.5485577997601158e-05, + "loss": 0.4864, + "step": 1879 + }, + { + "epoch": 1.25, + "learning_rate": 1.5481147674348366e-05, + "loss": 0.4753, + "step": 1880 + }, + { + "epoch": 1.25, + "learning_rate": 1.547671581272186e-05, + "loss": 0.5381, + "step": 1881 + }, + { + "epoch": 1.25, + "learning_rate": 1.5472282413965508e-05, + "loss": 0.5472, + "step": 1882 + }, + { + "epoch": 1.25, + "learning_rate": 1.5467847479323622e-05, + "loss": 0.5016, + "step": 1883 + }, + { + "epoch": 1.25, + "learning_rate": 1.546341101004093e-05, + "loss": 0.4962, + "step": 1884 + }, + { + "epoch": 1.25, + "learning_rate": 1.545897300736261e-05, + "loss": 0.514, + "step": 1885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5454533472534253e-05, + "loss": 0.5045, + "step": 1886 + }, + { + "epoch": 1.25, + "learning_rate": 1.5450092406801892e-05, + "loss": 0.464, + "step": 1887 + }, + { + "epoch": 1.25, + "learning_rate": 1.544564981141198e-05, + "loss": 0.4967, + "step": 1888 + }, + { + "epoch": 1.25, + "learning_rate": 1.5441205687611403e-05, + "loss": 0.5189, + "step": 1889 + }, + { + "epoch": 1.25, + "learning_rate": 1.5436760036647485e-05, + "loss": 0.4891, + "step": 1890 + }, + { + "epoch": 1.26, + "learning_rate": 1.5432312859767963e-05, + "loss": 0.483, + "step": 1891 + }, + { + "epoch": 1.26, + "learning_rate": 1.5427864158221015e-05, + "loss": 0.5056, + "step": 1892 + }, + { + "epoch": 1.26, + "learning_rate": 1.5423413933255237e-05, + "loss": 0.4923, + "step": 1893 + }, + { + "epoch": 1.26, + "learning_rate": 1.541896218611966e-05, + "loss": 0.497, + "step": 1894 + }, + { + "epoch": 1.26, + "learning_rate": 1.541450891806374e-05, + "loss": 0.5341, + "step": 1895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5410054130337358e-05, + "loss": 0.5075, + "step": 1896 + }, + { + "epoch": 1.26, + "learning_rate": 1.5405597824190822e-05, + "loss": 0.5031, + "step": 1897 + }, + { + "epoch": 1.26, + "learning_rate": 1.5401140000874873e-05, + "loss": 0.5042, + "step": 1898 + }, + { + "epoch": 1.26, + "learning_rate": 1.5396680661640667e-05, + "loss": 0.4972, + "step": 1899 + }, + { + "epoch": 1.26, + "learning_rate": 1.539221980773979e-05, + "loss": 0.487, + "step": 1900 + }, + { + "epoch": 1.26, + "learning_rate": 1.538775744042426e-05, + "loss": 0.4908, + "step": 1901 + }, + { + "epoch": 1.26, + "learning_rate": 1.5383293560946505e-05, + "loss": 0.5037, + "step": 1902 + }, + { + "epoch": 1.26, + "learning_rate": 1.5378828170559387e-05, + "loss": 0.5053, + "step": 1903 + }, + { + "epoch": 1.26, + "learning_rate": 1.5374361270516197e-05, + "loss": 0.467, + "step": 1904 + }, + { + "epoch": 1.26, + "learning_rate": 1.5369892862070636e-05, + "loss": 0.5163, + "step": 1905 + }, + { + "epoch": 1.27, + "learning_rate": 1.5365422946476842e-05, + "loss": 0.4656, + "step": 1906 + }, + { + "epoch": 1.27, + "learning_rate": 1.5360951524989367e-05, + "loss": 0.4932, + "step": 1907 + }, + { + "epoch": 1.27, + "learning_rate": 1.5356478598863187e-05, + "loss": 0.4975, + "step": 1908 + }, + { + "epoch": 1.27, + "learning_rate": 1.5352004169353706e-05, + "loss": 0.4963, + "step": 1909 + }, + { + "epoch": 1.27, + "learning_rate": 1.5347528237716742e-05, + "loss": 0.453, + "step": 1910 + }, + { + "epoch": 1.27, + "learning_rate": 1.5343050805208543e-05, + "loss": 0.4866, + "step": 1911 + }, + { + "epoch": 1.27, + "learning_rate": 1.533857187308577e-05, + "loss": 0.4702, + "step": 1912 + }, + { + "epoch": 1.27, + "learning_rate": 1.533409144260551e-05, + "loss": 0.5095, + "step": 1913 + }, + { + "epoch": 1.27, + "learning_rate": 1.5329609515025262e-05, + "loss": 0.4992, + "step": 1914 + }, + { + "epoch": 1.27, + "learning_rate": 1.5325126091602965e-05, + "loss": 0.4683, + "step": 1915 + }, + { + "epoch": 1.27, + "learning_rate": 1.532064117359696e-05, + "loss": 0.4832, + "step": 1916 + }, + { + "epoch": 1.27, + "learning_rate": 1.5316154762266008e-05, + "loss": 0.4995, + "step": 1917 + }, + { + "epoch": 1.27, + "learning_rate": 1.5311666858869296e-05, + "loss": 0.4624, + "step": 1918 + }, + { + "epoch": 1.27, + "learning_rate": 1.530717746466643e-05, + "loss": 0.4787, + "step": 1919 + }, + { + "epoch": 1.27, + "learning_rate": 1.5302686580917428e-05, + "loss": 0.4613, + "step": 1920 + }, + { + "epoch": 1.28, + "learning_rate": 1.5298194208882735e-05, + "loss": 0.4764, + "step": 1921 + }, + { + "epoch": 1.28, + "learning_rate": 1.5293700349823203e-05, + "loss": 0.4643, + "step": 1922 + }, + { + "epoch": 1.28, + "learning_rate": 1.5289205005000113e-05, + "loss": 0.4731, + "step": 1923 + }, + { + "epoch": 1.28, + "learning_rate": 1.5284708175675153e-05, + "loss": 0.485, + "step": 1924 + }, + { + "epoch": 1.28, + "learning_rate": 1.528020986311043e-05, + "loss": 0.4789, + "step": 1925 + }, + { + "epoch": 1.28, + "learning_rate": 1.5275710068568477e-05, + "loss": 0.4775, + "step": 1926 + }, + { + "epoch": 1.28, + "learning_rate": 1.5271208793312226e-05, + "loss": 0.4853, + "step": 1927 + }, + { + "epoch": 1.28, + "learning_rate": 1.5266706038605038e-05, + "loss": 0.5434, + "step": 1928 + }, + { + "epoch": 1.28, + "learning_rate": 1.5262201805710683e-05, + "loss": 0.5327, + "step": 1929 + }, + { + "epoch": 1.28, + "learning_rate": 1.525769609589335e-05, + "loss": 0.4669, + "step": 1930 + }, + { + "epoch": 1.28, + "learning_rate": 1.5253188910417636e-05, + "loss": 0.466, + "step": 1931 + }, + { + "epoch": 1.28, + "learning_rate": 1.5248680250548558e-05, + "loss": 0.4469, + "step": 1932 + }, + { + "epoch": 1.28, + "learning_rate": 1.524417011755155e-05, + "loss": 0.5053, + "step": 1933 + }, + { + "epoch": 1.28, + "learning_rate": 1.5239658512692447e-05, + "loss": 0.4796, + "step": 1934 + }, + { + "epoch": 1.28, + "learning_rate": 1.523514543723751e-05, + "loss": 0.4903, + "step": 1935 + }, + { + "epoch": 1.29, + "learning_rate": 1.5230630892453407e-05, + "loss": 0.4798, + "step": 1936 + }, + { + "epoch": 1.29, + "learning_rate": 1.5226114879607215e-05, + "loss": 0.4407, + "step": 1937 + }, + { + "epoch": 1.29, + "learning_rate": 1.522159739996643e-05, + "loss": 0.5244, + "step": 1938 + }, + { + "epoch": 1.29, + "learning_rate": 1.5217078454798952e-05, + "loss": 0.4863, + "step": 1939 + }, + { + "epoch": 1.29, + "learning_rate": 1.5212558045373106e-05, + "loss": 0.473, + "step": 1940 + }, + { + "epoch": 1.29, + "learning_rate": 1.5208036172957612e-05, + "loss": 0.5057, + "step": 1941 + }, + { + "epoch": 1.29, + "learning_rate": 1.5203512838821609e-05, + "loss": 0.4693, + "step": 1942 + }, + { + "epoch": 1.29, + "learning_rate": 1.5198988044234644e-05, + "loss": 0.503, + "step": 1943 + }, + { + "epoch": 1.29, + "learning_rate": 1.5194461790466674e-05, + "loss": 0.502, + "step": 1944 + }, + { + "epoch": 1.29, + "learning_rate": 1.5189934078788069e-05, + "loss": 0.493, + "step": 1945 + }, + { + "epoch": 1.29, + "learning_rate": 1.5185404910469604e-05, + "loss": 0.4808, + "step": 1946 + }, + { + "epoch": 1.29, + "learning_rate": 1.5180874286782464e-05, + "loss": 0.4626, + "step": 1947 + }, + { + "epoch": 1.29, + "learning_rate": 1.517634220899824e-05, + "loss": 0.4917, + "step": 1948 + }, + { + "epoch": 1.29, + "learning_rate": 1.5171808678388934e-05, + "loss": 0.4725, + "step": 1949 + }, + { + "epoch": 1.3, + "learning_rate": 1.5167273696226965e-05, + "loss": 0.4932, + "step": 1950 + }, + { + "epoch": 1.3, + "learning_rate": 1.516273726378514e-05, + "loss": 0.4965, + "step": 1951 + }, + { + "epoch": 1.3, + "learning_rate": 1.5158199382336678e-05, + "loss": 0.5235, + "step": 1952 + }, + { + "epoch": 1.3, + "learning_rate": 1.5153660053155227e-05, + "loss": 0.5067, + "step": 1953 + }, + { + "epoch": 1.3, + "learning_rate": 1.5149119277514808e-05, + "loss": 0.4946, + "step": 1954 + }, + { + "epoch": 1.3, + "learning_rate": 1.5144577056689872e-05, + "loss": 0.5235, + "step": 1955 + }, + { + "epoch": 1.3, + "learning_rate": 1.5140033391955266e-05, + "loss": 0.4944, + "step": 1956 + }, + { + "epoch": 1.3, + "learning_rate": 1.5135488284586241e-05, + "loss": 0.4987, + "step": 1957 + }, + { + "epoch": 1.3, + "learning_rate": 1.513094173585846e-05, + "loss": 0.5327, + "step": 1958 + }, + { + "epoch": 1.3, + "learning_rate": 1.5126393747047983e-05, + "loss": 0.5004, + "step": 1959 + }, + { + "epoch": 1.3, + "learning_rate": 1.512184431943128e-05, + "loss": 0.4803, + "step": 1960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5117293454285217e-05, + "loss": 0.5076, + "step": 1961 + }, + { + "epoch": 1.3, + "learning_rate": 1.5112741152887078e-05, + "loss": 0.5194, + "step": 1962 + }, + { + "epoch": 1.3, + "learning_rate": 1.5108187416514533e-05, + "loss": 0.4837, + "step": 1963 + }, + { + "epoch": 1.3, + "learning_rate": 1.5103632246445666e-05, + "loss": 0.4671, + "step": 1964 + }, + { + "epoch": 1.31, + "learning_rate": 1.5099075643958959e-05, + "loss": 0.5293, + "step": 1965 + }, + { + "epoch": 1.31, + "learning_rate": 1.5094517610333294e-05, + "loss": 0.5148, + "step": 1966 + }, + { + "epoch": 1.31, + "learning_rate": 1.5089958146847965e-05, + "loss": 0.5091, + "step": 1967 + }, + { + "epoch": 1.31, + "learning_rate": 1.5085397254782655e-05, + "loss": 0.4895, + "step": 1968 + }, + { + "epoch": 1.31, + "learning_rate": 1.508083493541745e-05, + "loss": 0.5222, + "step": 1969 + }, + { + "epoch": 1.31, + "learning_rate": 1.5076271190032845e-05, + "loss": 0.488, + "step": 1970 + }, + { + "epoch": 1.31, + "learning_rate": 1.507170601990973e-05, + "loss": 0.492, + "step": 1971 + }, + { + "epoch": 1.31, + "learning_rate": 1.5067139426329389e-05, + "loss": 0.466, + "step": 1972 + }, + { + "epoch": 1.31, + "learning_rate": 1.5062571410573515e-05, + "loss": 0.5013, + "step": 1973 + }, + { + "epoch": 1.31, + "learning_rate": 1.5058001973924197e-05, + "loss": 0.5407, + "step": 1974 + }, + { + "epoch": 1.31, + "learning_rate": 1.5053431117663922e-05, + "loss": 0.5054, + "step": 1975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5048858843075573e-05, + "loss": 0.4888, + "step": 1976 + }, + { + "epoch": 1.31, + "learning_rate": 1.5044285151442437e-05, + "loss": 0.4669, + "step": 1977 + }, + { + "epoch": 1.31, + "learning_rate": 1.5039710044048192e-05, + "loss": 0.5075, + "step": 1978 + }, + { + "epoch": 1.31, + "learning_rate": 1.5035133522176916e-05, + "loss": 0.5005, + "step": 1979 + }, + { + "epoch": 1.32, + "learning_rate": 1.5030555587113091e-05, + "loss": 0.5604, + "step": 1980 + }, + { + "epoch": 1.32, + "learning_rate": 1.5025976240141585e-05, + "loss": 0.5057, + "step": 1981 + }, + { + "epoch": 1.32, + "learning_rate": 1.5021395482547665e-05, + "loss": 0.494, + "step": 1982 + }, + { + "epoch": 1.32, + "learning_rate": 1.5016813315616998e-05, + "loss": 0.4726, + "step": 1983 + }, + { + "epoch": 1.32, + "learning_rate": 1.5012229740635644e-05, + "loss": 0.4818, + "step": 1984 + }, + { + "epoch": 1.32, + "learning_rate": 1.5007644758890059e-05, + "loss": 0.4719, + "step": 1985 + }, + { + "epoch": 1.32, + "learning_rate": 1.5003058371667087e-05, + "loss": 0.52, + "step": 1986 + }, + { + "epoch": 1.32, + "learning_rate": 1.4998470580253981e-05, + "loss": 0.4593, + "step": 1987 + }, + { + "epoch": 1.32, + "learning_rate": 1.4993881385938376e-05, + "loss": 0.5065, + "step": 1988 + }, + { + "epoch": 1.32, + "learning_rate": 1.4989290790008304e-05, + "loss": 0.4921, + "step": 1989 + }, + { + "epoch": 1.32, + "learning_rate": 1.4984698793752193e-05, + "loss": 0.5069, + "step": 1990 + }, + { + "epoch": 1.32, + "learning_rate": 1.498010539845886e-05, + "loss": 0.4687, + "step": 1991 + }, + { + "epoch": 1.32, + "learning_rate": 1.4975510605417514e-05, + "loss": 0.4661, + "step": 1992 + }, + { + "epoch": 1.32, + "learning_rate": 1.4970914415917764e-05, + "loss": 0.5055, + "step": 1993 + }, + { + "epoch": 1.32, + "learning_rate": 1.4966316831249601e-05, + "loss": 0.4628, + "step": 1994 + }, + { + "epoch": 1.33, + "learning_rate": 1.4961717852703417e-05, + "loss": 0.5247, + "step": 1995 + }, + { + "epoch": 1.33, + "learning_rate": 1.4957117481569987e-05, + "loss": 0.4945, + "step": 1996 + }, + { + "epoch": 1.33, + "learning_rate": 1.4952515719140482e-05, + "loss": 0.5304, + "step": 1997 + }, + { + "epoch": 1.33, + "learning_rate": 1.4947912566706459e-05, + "loss": 0.5036, + "step": 1998 + }, + { + "epoch": 1.33, + "learning_rate": 1.4943308025559871e-05, + "loss": 0.488, + "step": 1999 + }, + { + "epoch": 1.33, + "learning_rate": 1.4938702096993057e-05, + "loss": 0.4976, + "step": 2000 + }, + { + "epoch": 1.33, + "learning_rate": 1.4934094782298747e-05, + "loss": 0.4887, + "step": 2001 + }, + { + "epoch": 1.33, + "learning_rate": 1.4929486082770059e-05, + "loss": 0.5295, + "step": 2002 + }, + { + "epoch": 1.33, + "learning_rate": 1.4924875999700499e-05, + "loss": 0.4886, + "step": 2003 + }, + { + "epoch": 1.33, + "learning_rate": 1.4920264534383962e-05, + "loss": 0.4646, + "step": 2004 + }, + { + "epoch": 1.33, + "learning_rate": 1.4915651688114733e-05, + "loss": 0.5137, + "step": 2005 + }, + { + "epoch": 1.33, + "learning_rate": 1.491103746218748e-05, + "loss": 0.4779, + "step": 2006 + }, + { + "epoch": 1.33, + "learning_rate": 1.490642185789726e-05, + "loss": 0.4943, + "step": 2007 + }, + { + "epoch": 1.33, + "learning_rate": 1.4901804876539522e-05, + "loss": 0.52, + "step": 2008 + }, + { + "epoch": 1.33, + "learning_rate": 1.4897186519410095e-05, + "loss": 0.4914, + "step": 2009 + }, + { + "epoch": 1.34, + "learning_rate": 1.48925667878052e-05, + "loss": 0.4862, + "step": 2010 + }, + { + "epoch": 1.34, + "learning_rate": 1.4887945683021436e-05, + "loss": 0.5101, + "step": 2011 + }, + { + "epoch": 1.34, + "learning_rate": 1.4883323206355791e-05, + "loss": 0.4578, + "step": 2012 + }, + { + "epoch": 1.34, + "learning_rate": 1.4878699359105641e-05, + "loss": 0.4595, + "step": 2013 + }, + { + "epoch": 1.34, + "learning_rate": 1.4874074142568741e-05, + "loss": 0.5145, + "step": 2014 + }, + { + "epoch": 1.34, + "learning_rate": 1.486944755804324e-05, + "loss": 0.4861, + "step": 2015 + }, + { + "epoch": 1.34, + "learning_rate": 1.4864819606827664e-05, + "loss": 0.5072, + "step": 2016 + }, + { + "epoch": 1.34, + "learning_rate": 1.4860190290220913e-05, + "loss": 0.5119, + "step": 2017 + }, + { + "epoch": 1.34, + "learning_rate": 1.4855559609522292e-05, + "loss": 0.4695, + "step": 2018 + }, + { + "epoch": 1.34, + "learning_rate": 1.4850927566031472e-05, + "loss": 0.4962, + "step": 2019 + }, + { + "epoch": 1.34, + "learning_rate": 1.484629416104851e-05, + "loss": 0.4925, + "step": 2020 + }, + { + "epoch": 1.34, + "learning_rate": 1.4841659395873852e-05, + "loss": 0.4892, + "step": 2021 + }, + { + "epoch": 1.34, + "learning_rate": 1.4837023271808317e-05, + "loss": 0.5147, + "step": 2022 + }, + { + "epoch": 1.34, + "learning_rate": 1.483238579015311e-05, + "loss": 0.4766, + "step": 2023 + }, + { + "epoch": 1.34, + "learning_rate": 1.4827746952209816e-05, + "loss": 0.5117, + "step": 2024 + }, + { + "epoch": 1.35, + "learning_rate": 1.4823106759280404e-05, + "loss": 0.4925, + "step": 2025 + }, + { + "epoch": 1.35, + "learning_rate": 1.4818465212667213e-05, + "loss": 0.5164, + "step": 2026 + }, + { + "epoch": 1.35, + "learning_rate": 1.4813822313672974e-05, + "loss": 0.4803, + "step": 2027 + }, + { + "epoch": 1.35, + "learning_rate": 1.480917806360079e-05, + "loss": 0.4733, + "step": 2028 + }, + { + "epoch": 1.35, + "learning_rate": 1.4804532463754148e-05, + "loss": 0.4636, + "step": 2029 + }, + { + "epoch": 1.35, + "learning_rate": 1.4799885515436912e-05, + "loss": 0.4766, + "step": 2030 + }, + { + "epoch": 1.35, + "learning_rate": 1.4795237219953323e-05, + "loss": 0.497, + "step": 2031 + }, + { + "epoch": 1.35, + "learning_rate": 1.4790587578607998e-05, + "loss": 0.5008, + "step": 2032 + }, + { + "epoch": 1.35, + "learning_rate": 1.4785936592705938e-05, + "loss": 0.4939, + "step": 2033 + }, + { + "epoch": 1.35, + "learning_rate": 1.478128426355252e-05, + "loss": 0.5068, + "step": 2034 + }, + { + "epoch": 1.35, + "learning_rate": 1.4776630592453492e-05, + "loss": 0.4975, + "step": 2035 + }, + { + "epoch": 1.35, + "learning_rate": 1.4771975580714986e-05, + "loss": 0.5264, + "step": 2036 + }, + { + "epoch": 1.35, + "learning_rate": 1.4767319229643506e-05, + "loss": 0.4815, + "step": 2037 + }, + { + "epoch": 1.35, + "learning_rate": 1.4762661540545932e-05, + "loss": 0.528, + "step": 2038 + }, + { + "epoch": 1.36, + "learning_rate": 1.4758002514729524e-05, + "loss": 0.4937, + "step": 2039 + }, + { + "epoch": 1.36, + "learning_rate": 1.4753342153501913e-05, + "loss": 0.48, + "step": 2040 + }, + { + "epoch": 1.36, + "learning_rate": 1.4748680458171099e-05, + "loss": 0.4883, + "step": 2041 + }, + { + "epoch": 1.36, + "learning_rate": 1.4744017430045473e-05, + "loss": 0.4962, + "step": 2042 + }, + { + "epoch": 1.36, + "learning_rate": 1.4739353070433784e-05, + "loss": 0.5208, + "step": 2043 + }, + { + "epoch": 1.36, + "learning_rate": 1.473468738064516e-05, + "loss": 0.4529, + "step": 2044 + }, + { + "epoch": 1.36, + "learning_rate": 1.4730020361989108e-05, + "loss": 0.5103, + "step": 2045 + }, + { + "epoch": 1.36, + "learning_rate": 1.47253520157755e-05, + "loss": 0.5091, + "step": 2046 + }, + { + "epoch": 1.36, + "learning_rate": 1.472068234331458e-05, + "loss": 0.5402, + "step": 2047 + }, + { + "epoch": 1.36, + "learning_rate": 1.4716011345916976e-05, + "loss": 0.5004, + "step": 2048 + }, + { + "epoch": 1.36, + "learning_rate": 1.4711339024893674e-05, + "loss": 0.4977, + "step": 2049 + }, + { + "epoch": 1.36, + "learning_rate": 1.470666538155604e-05, + "loss": 0.4738, + "step": 2050 + }, + { + "epoch": 1.36, + "learning_rate": 1.4701990417215807e-05, + "loss": 0.4804, + "step": 2051 + }, + { + "epoch": 1.36, + "learning_rate": 1.4697314133185083e-05, + "loss": 0.4805, + "step": 2052 + }, + { + "epoch": 1.36, + "learning_rate": 1.4692636530776336e-05, + "loss": 0.5, + "step": 2053 + }, + { + "epoch": 1.37, + "learning_rate": 1.468795761130242e-05, + "loss": 0.5291, + "step": 2054 + }, + { + "epoch": 1.37, + "learning_rate": 1.4683277376076548e-05, + "loss": 0.5034, + "step": 2055 + }, + { + "epoch": 1.37, + "learning_rate": 1.4678595826412303e-05, + "loss": 0.4834, + "step": 2056 + }, + { + "epoch": 1.37, + "learning_rate": 1.4673912963623637e-05, + "loss": 0.5055, + "step": 2057 + }, + { + "epoch": 1.37, + "learning_rate": 1.4669228789024877e-05, + "loss": 0.4725, + "step": 2058 + }, + { + "epoch": 1.37, + "learning_rate": 1.466454330393071e-05, + "loss": 0.4926, + "step": 2059 + }, + { + "epoch": 1.37, + "learning_rate": 1.4659856509656194e-05, + "loss": 0.5579, + "step": 2060 + }, + { + "epoch": 1.37, + "learning_rate": 1.4655168407516754e-05, + "loss": 0.5123, + "step": 2061 + }, + { + "epoch": 1.37, + "learning_rate": 1.465047899882818e-05, + "loss": 0.4895, + "step": 2062 + }, + { + "epoch": 1.37, + "learning_rate": 1.4645788284906639e-05, + "loss": 0.5141, + "step": 2063 + }, + { + "epoch": 1.37, + "learning_rate": 1.464109626706865e-05, + "loss": 0.4875, + "step": 2064 + }, + { + "epoch": 1.37, + "learning_rate": 1.4636402946631108e-05, + "loss": 0.5015, + "step": 2065 + }, + { + "epoch": 1.37, + "learning_rate": 1.4631708324911269e-05, + "loss": 0.4835, + "step": 2066 + }, + { + "epoch": 1.37, + "learning_rate": 1.4627012403226752e-05, + "loss": 0.5068, + "step": 2067 + }, + { + "epoch": 1.37, + "learning_rate": 1.462231518289555e-05, + "loss": 0.5033, + "step": 2068 + }, + { + "epoch": 1.38, + "learning_rate": 1.461761666523601e-05, + "loss": 0.5302, + "step": 2069 + }, + { + "epoch": 1.38, + "learning_rate": 1.4612916851566851e-05, + "loss": 0.4861, + "step": 2070 + }, + { + "epoch": 1.38, + "learning_rate": 1.4608215743207153e-05, + "loss": 0.4787, + "step": 2071 + }, + { + "epoch": 1.38, + "learning_rate": 1.4603513341476354e-05, + "loss": 0.5375, + "step": 2072 + }, + { + "epoch": 1.38, + "learning_rate": 1.4598809647694266e-05, + "loss": 0.509, + "step": 2073 + }, + { + "epoch": 1.38, + "learning_rate": 1.4594104663181052e-05, + "loss": 0.4946, + "step": 2074 + }, + { + "epoch": 1.38, + "learning_rate": 1.4589398389257246e-05, + "loss": 0.5032, + "step": 2075 + }, + { + "epoch": 1.38, + "learning_rate": 1.458469082724374e-05, + "loss": 0.4754, + "step": 2076 + }, + { + "epoch": 1.38, + "learning_rate": 1.4579981978461792e-05, + "loss": 0.4844, + "step": 2077 + }, + { + "epoch": 1.38, + "learning_rate": 1.457527184423301e-05, + "loss": 0.4729, + "step": 2078 + }, + { + "epoch": 1.38, + "learning_rate": 1.4570560425879377e-05, + "loss": 0.4808, + "step": 2079 + }, + { + "epoch": 1.38, + "learning_rate": 1.4565847724723225e-05, + "loss": 0.4565, + "step": 2080 + }, + { + "epoch": 1.38, + "learning_rate": 1.4561133742087253e-05, + "loss": 0.4898, + "step": 2081 + }, + { + "epoch": 1.38, + "learning_rate": 1.4556418479294514e-05, + "loss": 0.5159, + "step": 2082 + }, + { + "epoch": 1.38, + "learning_rate": 1.455170193766843e-05, + "loss": 0.5023, + "step": 2083 + }, + { + "epoch": 1.39, + "learning_rate": 1.454698411853277e-05, + "loss": 0.5002, + "step": 2084 + }, + { + "epoch": 1.39, + "learning_rate": 1.454226502321167e-05, + "loss": 0.4662, + "step": 2085 + }, + { + "epoch": 1.39, + "learning_rate": 1.453754465302962e-05, + "loss": 0.4774, + "step": 2086 + }, + { + "epoch": 1.39, + "learning_rate": 1.453282300931147e-05, + "loss": 0.5091, + "step": 2087 + }, + { + "epoch": 1.39, + "learning_rate": 1.4528100093382422e-05, + "loss": 0.4902, + "step": 2088 + }, + { + "epoch": 1.39, + "learning_rate": 1.4523375906568048e-05, + "loss": 0.4714, + "step": 2089 + }, + { + "epoch": 1.39, + "learning_rate": 1.4518650450194261e-05, + "loss": 0.4562, + "step": 2090 + }, + { + "epoch": 1.39, + "learning_rate": 1.451392372558734e-05, + "loss": 0.5023, + "step": 2091 + }, + { + "epoch": 1.39, + "learning_rate": 1.4509195734073917e-05, + "loss": 0.5238, + "step": 2092 + }, + { + "epoch": 1.39, + "learning_rate": 1.4504466476980983e-05, + "loss": 0.4943, + "step": 2093 + }, + { + "epoch": 1.39, + "learning_rate": 1.4499735955635882e-05, + "loss": 0.5044, + "step": 2094 + }, + { + "epoch": 1.39, + "learning_rate": 1.4495004171366302e-05, + "loss": 0.4935, + "step": 2095 + }, + { + "epoch": 1.39, + "learning_rate": 1.4490271125500306e-05, + "loss": 0.5045, + "step": 2096 + }, + { + "epoch": 1.39, + "learning_rate": 1.4485536819366299e-05, + "loss": 0.4973, + "step": 2097 + }, + { + "epoch": 1.39, + "learning_rate": 1.448080125429304e-05, + "loss": 0.5382, + "step": 2098 + }, + { + "epoch": 1.4, + "learning_rate": 1.4476064431609641e-05, + "loss": 0.4492, + "step": 2099 + }, + { + "epoch": 1.4, + "learning_rate": 1.4471326352645573e-05, + "loss": 0.4704, + "step": 2100 + }, + { + "epoch": 1.4, + "learning_rate": 1.4466587018730654e-05, + "loss": 0.4742, + "step": 2101 + }, + { + "epoch": 1.4, + "learning_rate": 1.4461846431195053e-05, + "loss": 0.4793, + "step": 2102 + }, + { + "epoch": 1.4, + "learning_rate": 1.4457104591369293e-05, + "loss": 0.4901, + "step": 2103 + }, + { + "epoch": 1.4, + "learning_rate": 1.4452361500584252e-05, + "loss": 0.4717, + "step": 2104 + }, + { + "epoch": 1.4, + "learning_rate": 1.4447617160171154e-05, + "loss": 0.486, + "step": 2105 + }, + { + "epoch": 1.4, + "learning_rate": 1.444287157146158e-05, + "loss": 0.4732, + "step": 2106 + }, + { + "epoch": 1.4, + "learning_rate": 1.443812473578745e-05, + "loss": 0.504, + "step": 2107 + }, + { + "epoch": 1.4, + "learning_rate": 1.4433376654481046e-05, + "loss": 0.4926, + "step": 2108 + }, + { + "epoch": 1.4, + "learning_rate": 1.442862732887499e-05, + "loss": 0.4825, + "step": 2109 + }, + { + "epoch": 1.4, + "learning_rate": 1.4423876760302266e-05, + "loss": 0.5016, + "step": 2110 + }, + { + "epoch": 1.4, + "learning_rate": 1.4419124950096192e-05, + "loss": 0.4784, + "step": 2111 + }, + { + "epoch": 1.4, + "learning_rate": 1.4414371899590445e-05, + "loss": 0.5022, + "step": 2112 + }, + { + "epoch": 1.4, + "learning_rate": 1.4409617610119041e-05, + "loss": 0.539, + "step": 2113 + }, + { + "epoch": 1.41, + "learning_rate": 1.4404862083016355e-05, + "loss": 0.4778, + "step": 2114 + }, + { + "epoch": 1.41, + "learning_rate": 1.4400105319617102e-05, + "loss": 0.4949, + "step": 2115 + }, + { + "epoch": 1.41, + "learning_rate": 1.439534732125634e-05, + "loss": 0.5068, + "step": 2116 + }, + { + "epoch": 1.41, + "learning_rate": 1.4390588089269488e-05, + "loss": 0.5148, + "step": 2117 + }, + { + "epoch": 1.41, + "learning_rate": 1.4385827624992297e-05, + "loss": 0.4851, + "step": 2118 + }, + { + "epoch": 1.41, + "learning_rate": 1.4381065929760867e-05, + "loss": 0.4907, + "step": 2119 + }, + { + "epoch": 1.41, + "learning_rate": 1.4376303004911654e-05, + "loss": 0.4712, + "step": 2120 + }, + { + "epoch": 1.41, + "learning_rate": 1.437153885178144e-05, + "loss": 0.4832, + "step": 2121 + }, + { + "epoch": 1.41, + "learning_rate": 1.4366773471707368e-05, + "loss": 0.4634, + "step": 2122 + }, + { + "epoch": 1.41, + "learning_rate": 1.4362006866026921e-05, + "loss": 0.4701, + "step": 2123 + }, + { + "epoch": 1.41, + "learning_rate": 1.4357239036077924e-05, + "loss": 0.4728, + "step": 2124 + }, + { + "epoch": 1.41, + "learning_rate": 1.4352469983198542e-05, + "loss": 0.4946, + "step": 2125 + }, + { + "epoch": 1.41, + "learning_rate": 1.4347699708727299e-05, + "loss": 0.4711, + "step": 2126 + }, + { + "epoch": 1.41, + "learning_rate": 1.4342928214003038e-05, + "loss": 0.4568, + "step": 2127 + }, + { + "epoch": 1.41, + "learning_rate": 1.433815550036496e-05, + "loss": 0.4914, + "step": 2128 + }, + { + "epoch": 1.42, + "learning_rate": 1.4333381569152612e-05, + "loss": 0.4917, + "step": 2129 + }, + { + "epoch": 1.42, + "learning_rate": 1.4328606421705868e-05, + "loss": 0.4972, + "step": 2130 + }, + { + "epoch": 1.42, + "learning_rate": 1.4323830059364953e-05, + "loss": 0.4808, + "step": 2131 + }, + { + "epoch": 1.42, + "learning_rate": 1.4319052483470437e-05, + "loss": 0.495, + "step": 2132 + }, + { + "epoch": 1.42, + "learning_rate": 1.4314273695363216e-05, + "loss": 0.4679, + "step": 2133 + }, + { + "epoch": 1.42, + "learning_rate": 1.4309493696384543e-05, + "loss": 0.5073, + "step": 2134 + }, + { + "epoch": 1.42, + "learning_rate": 1.4304712487875999e-05, + "loss": 0.5183, + "step": 2135 + }, + { + "epoch": 1.42, + "learning_rate": 1.4299930071179505e-05, + "loss": 0.5257, + "step": 2136 + }, + { + "epoch": 1.42, + "learning_rate": 1.4295146447637325e-05, + "loss": 0.4997, + "step": 2137 + }, + { + "epoch": 1.42, + "learning_rate": 1.4290361618592071e-05, + "loss": 0.4862, + "step": 2138 + }, + { + "epoch": 1.42, + "learning_rate": 1.4285575585386673e-05, + "loss": 0.4498, + "step": 2139 + }, + { + "epoch": 1.42, + "learning_rate": 1.4280788349364414e-05, + "loss": 0.5013, + "step": 2140 + }, + { + "epoch": 1.42, + "learning_rate": 1.4275999911868912e-05, + "loss": 0.4696, + "step": 2141 + }, + { + "epoch": 1.42, + "learning_rate": 1.4271210274244114e-05, + "loss": 0.5032, + "step": 2142 + }, + { + "epoch": 1.43, + "learning_rate": 1.4266419437834312e-05, + "loss": 0.4575, + "step": 2143 + }, + { + "epoch": 1.43, + "learning_rate": 1.4261627403984136e-05, + "loss": 0.5013, + "step": 2144 + }, + { + "epoch": 1.43, + "learning_rate": 1.4256834174038545e-05, + "loss": 0.4897, + "step": 2145 + }, + { + "epoch": 1.43, + "learning_rate": 1.4252039749342844e-05, + "loss": 0.4883, + "step": 2146 + }, + { + "epoch": 1.43, + "learning_rate": 1.4247244131242656e-05, + "loss": 0.5088, + "step": 2147 + }, + { + "epoch": 1.43, + "learning_rate": 1.4242447321083959e-05, + "loss": 0.4663, + "step": 2148 + }, + { + "epoch": 1.43, + "learning_rate": 1.4237649320213053e-05, + "loss": 0.4839, + "step": 2149 + }, + { + "epoch": 1.43, + "learning_rate": 1.4232850129976573e-05, + "loss": 0.5, + "step": 2150 + }, + { + "epoch": 1.43, + "learning_rate": 1.422804975172149e-05, + "loss": 0.477, + "step": 2151 + }, + { + "epoch": 1.43, + "learning_rate": 1.4223248186795116e-05, + "loss": 0.4794, + "step": 2152 + }, + { + "epoch": 1.43, + "learning_rate": 1.4218445436545081e-05, + "loss": 0.4905, + "step": 2153 + }, + { + "epoch": 1.43, + "learning_rate": 1.4213641502319358e-05, + "loss": 0.4793, + "step": 2154 + }, + { + "epoch": 1.43, + "learning_rate": 1.420883638546625e-05, + "loss": 0.459, + "step": 2155 + }, + { + "epoch": 1.43, + "learning_rate": 1.4204030087334391e-05, + "loss": 0.4852, + "step": 2156 + }, + { + "epoch": 1.43, + "learning_rate": 1.4199222609272747e-05, + "loss": 0.4884, + "step": 2157 + }, + { + "epoch": 1.44, + "learning_rate": 1.4194413952630615e-05, + "loss": 0.5076, + "step": 2158 + }, + { + "epoch": 1.44, + "learning_rate": 1.4189604118757626e-05, + "loss": 0.4751, + "step": 2159 + }, + { + "epoch": 1.44, + "learning_rate": 1.4184793109003734e-05, + "loss": 0.5153, + "step": 2160 + }, + { + "epoch": 1.44, + "learning_rate": 1.4179980924719231e-05, + "loss": 0.5292, + "step": 2161 + }, + { + "epoch": 1.44, + "learning_rate": 1.4175167567254735e-05, + "loss": 0.5047, + "step": 2162 + }, + { + "epoch": 1.44, + "learning_rate": 1.4170353037961193e-05, + "loss": 0.53, + "step": 2163 + }, + { + "epoch": 1.44, + "learning_rate": 1.416553733818988e-05, + "loss": 0.4982, + "step": 2164 + }, + { + "epoch": 1.44, + "learning_rate": 1.4160720469292402e-05, + "loss": 0.4829, + "step": 2165 + }, + { + "epoch": 1.44, + "learning_rate": 1.4155902432620692e-05, + "loss": 0.4918, + "step": 2166 + }, + { + "epoch": 1.44, + "learning_rate": 1.415108322952701e-05, + "loss": 0.4779, + "step": 2167 + }, + { + "epoch": 1.44, + "learning_rate": 1.4146262861363945e-05, + "loss": 0.5003, + "step": 2168 + }, + { + "epoch": 1.44, + "learning_rate": 1.4141441329484414e-05, + "loss": 0.5143, + "step": 2169 + }, + { + "epoch": 1.44, + "learning_rate": 1.4136618635241655e-05, + "loss": 0.5223, + "step": 2170 + }, + { + "epoch": 1.44, + "learning_rate": 1.4131794779989233e-05, + "loss": 0.4915, + "step": 2171 + }, + { + "epoch": 1.44, + "learning_rate": 1.4126969765081055e-05, + "loss": 0.4968, + "step": 2172 + }, + { + "epoch": 1.45, + "learning_rate": 1.4122143591871327e-05, + "loss": 0.5001, + "step": 2173 + }, + { + "epoch": 1.45, + "learning_rate": 1.41173162617146e-05, + "loss": 0.4703, + "step": 2174 + }, + { + "epoch": 1.45, + "learning_rate": 1.4112487775965741e-05, + "loss": 0.5016, + "step": 2175 + }, + { + "epoch": 1.45, + "learning_rate": 1.4107658135979944e-05, + "loss": 0.4968, + "step": 2176 + }, + { + "epoch": 1.45, + "learning_rate": 1.4102827343112725e-05, + "loss": 0.507, + "step": 2177 + }, + { + "epoch": 1.45, + "learning_rate": 1.4097995398719929e-05, + "loss": 0.4762, + "step": 2178 + }, + { + "epoch": 1.45, + "learning_rate": 1.4093162304157715e-05, + "loss": 0.4871, + "step": 2179 + }, + { + "epoch": 1.45, + "learning_rate": 1.4088328060782573e-05, + "loss": 0.5035, + "step": 2180 + }, + { + "epoch": 1.45, + "learning_rate": 1.4083492669951312e-05, + "loss": 0.5147, + "step": 2181 + }, + { + "epoch": 1.45, + "learning_rate": 1.4078656133021065e-05, + "loss": 0.4859, + "step": 2182 + }, + { + "epoch": 1.45, + "learning_rate": 1.4073818451349283e-05, + "loss": 0.5278, + "step": 2183 + }, + { + "epoch": 1.45, + "learning_rate": 1.406897962629374e-05, + "loss": 0.5022, + "step": 2184 + }, + { + "epoch": 1.45, + "learning_rate": 1.4064139659212534e-05, + "loss": 0.4675, + "step": 2185 + }, + { + "epoch": 1.45, + "learning_rate": 1.4059298551464077e-05, + "loss": 0.5268, + "step": 2186 + }, + { + "epoch": 1.45, + "learning_rate": 1.4054456304407111e-05, + "loss": 0.5141, + "step": 2187 + }, + { + "epoch": 1.46, + "learning_rate": 1.4049612919400687e-05, + "loss": 0.5162, + "step": 2188 + }, + { + "epoch": 1.46, + "learning_rate": 1.4044768397804181e-05, + "loss": 0.494, + "step": 2189 + }, + { + "epoch": 1.46, + "learning_rate": 1.403992274097729e-05, + "loss": 0.492, + "step": 2190 + }, + { + "epoch": 1.46, + "learning_rate": 1.4035075950280021e-05, + "loss": 0.4818, + "step": 2191 + }, + { + "epoch": 1.46, + "learning_rate": 1.403022802707271e-05, + "loss": 0.4826, + "step": 2192 + }, + { + "epoch": 1.46, + "learning_rate": 1.4025378972716004e-05, + "loss": 0.5209, + "step": 2193 + }, + { + "epoch": 1.46, + "learning_rate": 1.4020528788570871e-05, + "loss": 0.4911, + "step": 2194 + }, + { + "epoch": 1.46, + "learning_rate": 1.4015677475998595e-05, + "loss": 0.4891, + "step": 2195 + }, + { + "epoch": 1.46, + "learning_rate": 1.4010825036360771e-05, + "loss": 0.5338, + "step": 2196 + }, + { + "epoch": 1.46, + "learning_rate": 1.400597147101932e-05, + "loss": 0.4825, + "step": 2197 + }, + { + "epoch": 1.46, + "learning_rate": 1.4001116781336475e-05, + "loss": 0.471, + "step": 2198 + }, + { + "epoch": 1.46, + "learning_rate": 1.399626096867478e-05, + "loss": 0.4719, + "step": 2199 + }, + { + "epoch": 1.46, + "learning_rate": 1.3991404034397102e-05, + "loss": 0.4844, + "step": 2200 + }, + { + "epoch": 1.46, + "learning_rate": 1.398654597986662e-05, + "loss": 0.4912, + "step": 2201 + }, + { + "epoch": 1.46, + "learning_rate": 1.3981686806446822e-05, + "loss": 0.509, + "step": 2202 + }, + { + "epoch": 1.47, + "learning_rate": 1.3976826515501518e-05, + "loss": 0.5028, + "step": 2203 + }, + { + "epoch": 1.47, + "learning_rate": 1.3971965108394827e-05, + "loss": 0.4754, + "step": 2204 + }, + { + "epoch": 1.47, + "learning_rate": 1.3967102586491179e-05, + "loss": 0.5052, + "step": 2205 + }, + { + "epoch": 1.47, + "learning_rate": 1.3962238951155325e-05, + "loss": 0.4803, + "step": 2206 + }, + { + "epoch": 1.47, + "learning_rate": 1.3957374203752325e-05, + "loss": 0.5064, + "step": 2207 + }, + { + "epoch": 1.47, + "learning_rate": 1.3952508345647546e-05, + "loss": 0.5004, + "step": 2208 + }, + { + "epoch": 1.47, + "learning_rate": 1.3947641378206671e-05, + "loss": 0.5101, + "step": 2209 + }, + { + "epoch": 1.47, + "learning_rate": 1.3942773302795697e-05, + "loss": 0.4889, + "step": 2210 + }, + { + "epoch": 1.47, + "learning_rate": 1.3937904120780925e-05, + "loss": 0.509, + "step": 2211 + }, + { + "epoch": 1.47, + "learning_rate": 1.3933033833528971e-05, + "loss": 0.5285, + "step": 2212 + }, + { + "epoch": 1.47, + "learning_rate": 1.3928162442406765e-05, + "loss": 0.5387, + "step": 2213 + }, + { + "epoch": 1.47, + "learning_rate": 1.3923289948781538e-05, + "loss": 0.5058, + "step": 2214 + }, + { + "epoch": 1.47, + "learning_rate": 1.3918416354020836e-05, + "loss": 0.4837, + "step": 2215 + }, + { + "epoch": 1.47, + "learning_rate": 1.3913541659492516e-05, + "loss": 0.4815, + "step": 2216 + }, + { + "epoch": 1.47, + "learning_rate": 1.3908665866564736e-05, + "loss": 0.4681, + "step": 2217 + }, + { + "epoch": 1.48, + "learning_rate": 1.3903788976605972e-05, + "loss": 0.506, + "step": 2218 + }, + { + "epoch": 1.48, + "learning_rate": 1.3898910990985e-05, + "loss": 0.5058, + "step": 2219 + }, + { + "epoch": 1.48, + "learning_rate": 1.3894031911070904e-05, + "loss": 0.4847, + "step": 2220 + }, + { + "epoch": 1.48, + "learning_rate": 1.3889151738233087e-05, + "loss": 0.4951, + "step": 2221 + }, + { + "epoch": 1.48, + "learning_rate": 1.3884270473841235e-05, + "loss": 0.4927, + "step": 2222 + }, + { + "epoch": 1.48, + "learning_rate": 1.3879388119265366e-05, + "loss": 0.4699, + "step": 2223 + }, + { + "epoch": 1.48, + "learning_rate": 1.3874504675875791e-05, + "loss": 0.5349, + "step": 2224 + }, + { + "epoch": 1.48, + "learning_rate": 1.3869620145043123e-05, + "loss": 0.4957, + "step": 2225 + }, + { + "epoch": 1.48, + "learning_rate": 1.3864734528138288e-05, + "loss": 0.4657, + "step": 2226 + }, + { + "epoch": 1.48, + "learning_rate": 1.3859847826532518e-05, + "loss": 0.511, + "step": 2227 + }, + { + "epoch": 1.48, + "learning_rate": 1.385496004159734e-05, + "loss": 0.5271, + "step": 2228 + }, + { + "epoch": 1.48, + "learning_rate": 1.3850071174704596e-05, + "loss": 0.4646, + "step": 2229 + }, + { + "epoch": 1.48, + "learning_rate": 1.3845181227226423e-05, + "loss": 0.4827, + "step": 2230 + }, + { + "epoch": 1.48, + "learning_rate": 1.3840290200535265e-05, + "loss": 0.4871, + "step": 2231 + }, + { + "epoch": 1.48, + "learning_rate": 1.3835398096003869e-05, + "loss": 0.4928, + "step": 2232 + }, + { + "epoch": 1.49, + "learning_rate": 1.3830504915005285e-05, + "loss": 0.4904, + "step": 2233 + }, + { + "epoch": 1.49, + "learning_rate": 1.3825610658912863e-05, + "loss": 0.4594, + "step": 2234 + }, + { + "epoch": 1.49, + "learning_rate": 1.3820715329100256e-05, + "loss": 0.4881, + "step": 2235 + }, + { + "epoch": 1.49, + "learning_rate": 1.3815818926941422e-05, + "loss": 0.4578, + "step": 2236 + }, + { + "epoch": 1.49, + "learning_rate": 1.3810921453810611e-05, + "loss": 0.4822, + "step": 2237 + }, + { + "epoch": 1.49, + "learning_rate": 1.380602291108238e-05, + "loss": 0.5512, + "step": 2238 + }, + { + "epoch": 1.49, + "learning_rate": 1.3801123300131587e-05, + "loss": 0.5119, + "step": 2239 + }, + { + "epoch": 1.49, + "learning_rate": 1.3796222622333389e-05, + "loss": 0.4778, + "step": 2240 + }, + { + "epoch": 1.49, + "learning_rate": 1.379132087906324e-05, + "loss": 0.4658, + "step": 2241 + }, + { + "epoch": 1.49, + "learning_rate": 1.378641807169689e-05, + "loss": 0.4784, + "step": 2242 + }, + { + "epoch": 1.49, + "learning_rate": 1.3781514201610398e-05, + "loss": 0.4842, + "step": 2243 + }, + { + "epoch": 1.49, + "learning_rate": 1.3776609270180118e-05, + "loss": 0.4689, + "step": 2244 + }, + { + "epoch": 1.49, + "learning_rate": 1.3771703278782689e-05, + "loss": 0.4997, + "step": 2245 + }, + { + "epoch": 1.49, + "learning_rate": 1.3766796228795064e-05, + "loss": 0.5217, + "step": 2246 + }, + { + "epoch": 1.5, + "learning_rate": 1.3761888121594487e-05, + "loss": 0.4725, + "step": 2247 + }, + { + "epoch": 1.5, + "learning_rate": 1.3756978958558496e-05, + "loss": 0.4786, + "step": 2248 + }, + { + "epoch": 1.5, + "learning_rate": 1.375206874106493e-05, + "loss": 0.4814, + "step": 2249 + }, + { + "epoch": 1.5, + "learning_rate": 1.3747157470491923e-05, + "loss": 0.4969, + "step": 2250 + }, + { + "epoch": 1.5, + "learning_rate": 1.3742245148217901e-05, + "loss": 0.4791, + "step": 2251 + }, + { + "epoch": 1.5, + "learning_rate": 1.3737331775621588e-05, + "loss": 0.4764, + "step": 2252 + }, + { + "epoch": 1.5, + "learning_rate": 1.3732417354082002e-05, + "loss": 0.4959, + "step": 2253 + }, + { + "epoch": 1.5, + "learning_rate": 1.3727501884978456e-05, + "loss": 0.4977, + "step": 2254 + }, + { + "epoch": 1.5, + "learning_rate": 1.372258536969056e-05, + "loss": 0.5343, + "step": 2255 + }, + { + "epoch": 1.5, + "learning_rate": 1.371766780959821e-05, + "loss": 0.4699, + "step": 2256 + }, + { + "epoch": 1.5, + "learning_rate": 1.37127492060816e-05, + "loss": 0.503, + "step": 2257 + }, + { + "epoch": 1.5, + "learning_rate": 1.3707829560521219e-05, + "loss": 0.4918, + "step": 2258 + }, + { + "epoch": 1.5, + "learning_rate": 1.3702908874297846e-05, + "loss": 0.4999, + "step": 2259 + }, + { + "epoch": 1.5, + "learning_rate": 1.3697987148792546e-05, + "loss": 0.5087, + "step": 2260 + }, + { + "epoch": 1.5, + "learning_rate": 1.369306438538669e-05, + "loss": 0.4966, + "step": 2261 + }, + { + "epoch": 1.51, + "learning_rate": 1.368814058546193e-05, + "loss": 0.4839, + "step": 2262 + }, + { + "epoch": 1.51, + "learning_rate": 1.3683215750400207e-05, + "loss": 0.4963, + "step": 2263 + }, + { + "epoch": 1.51, + "learning_rate": 1.3678289881583759e-05, + "loss": 0.4981, + "step": 2264 + }, + { + "epoch": 1.51, + "learning_rate": 1.3673362980395115e-05, + "loss": 0.4385, + "step": 2265 + }, + { + "epoch": 1.51, + "learning_rate": 1.3668435048217083e-05, + "loss": 0.5018, + "step": 2266 + }, + { + "epoch": 1.51, + "learning_rate": 1.3663506086432774e-05, + "loss": 0.4818, + "step": 2267 + }, + { + "epoch": 1.51, + "learning_rate": 1.3658576096425578e-05, + "loss": 0.4613, + "step": 2268 + }, + { + "epoch": 1.51, + "learning_rate": 1.3653645079579178e-05, + "loss": 0.4918, + "step": 2269 + }, + { + "epoch": 1.51, + "learning_rate": 1.364871303727755e-05, + "loss": 0.5107, + "step": 2270 + }, + { + "epoch": 1.51, + "learning_rate": 1.3643779970904942e-05, + "loss": 0.5079, + "step": 2271 + }, + { + "epoch": 1.51, + "learning_rate": 1.3638845881845909e-05, + "loss": 0.5121, + "step": 2272 + }, + { + "epoch": 1.51, + "learning_rate": 1.3633910771485276e-05, + "loss": 0.4599, + "step": 2273 + }, + { + "epoch": 1.51, + "learning_rate": 1.3628974641208168e-05, + "loss": 0.488, + "step": 2274 + }, + { + "epoch": 1.51, + "learning_rate": 1.3624037492399991e-05, + "loss": 0.5232, + "step": 2275 + }, + { + "epoch": 1.51, + "learning_rate": 1.3619099326446434e-05, + "loss": 0.5337, + "step": 2276 + }, + { + "epoch": 1.52, + "learning_rate": 1.3614160144733475e-05, + "loss": 0.4987, + "step": 2277 + }, + { + "epoch": 1.52, + "learning_rate": 1.3609219948647377e-05, + "loss": 0.4844, + "step": 2278 + }, + { + "epoch": 1.52, + "learning_rate": 1.3604278739574683e-05, + "loss": 0.5125, + "step": 2279 + }, + { + "epoch": 1.52, + "learning_rate": 1.3599336518902228e-05, + "loss": 0.5085, + "step": 2280 + }, + { + "epoch": 1.52, + "learning_rate": 1.3594393288017122e-05, + "loss": 0.4843, + "step": 2281 + }, + { + "epoch": 1.52, + "learning_rate": 1.3589449048306773e-05, + "loss": 0.4926, + "step": 2282 + }, + { + "epoch": 1.52, + "learning_rate": 1.3584503801158854e-05, + "loss": 0.491, + "step": 2283 + }, + { + "epoch": 1.52, + "learning_rate": 1.3579557547961335e-05, + "loss": 0.4813, + "step": 2284 + }, + { + "epoch": 1.52, + "learning_rate": 1.3574610290102462e-05, + "loss": 0.498, + "step": 2285 + }, + { + "epoch": 1.52, + "learning_rate": 1.3569662028970759e-05, + "loss": 0.4855, + "step": 2286 + }, + { + "epoch": 1.52, + "learning_rate": 1.3564712765955038e-05, + "loss": 0.4984, + "step": 2287 + }, + { + "epoch": 1.52, + "learning_rate": 1.3559762502444396e-05, + "loss": 0.5005, + "step": 2288 + }, + { + "epoch": 1.52, + "learning_rate": 1.3554811239828198e-05, + "loss": 0.5207, + "step": 2289 + }, + { + "epoch": 1.52, + "learning_rate": 1.3549858979496104e-05, + "loss": 0.4841, + "step": 2290 + }, + { + "epoch": 1.52, + "learning_rate": 1.3544905722838042e-05, + "loss": 0.5085, + "step": 2291 + }, + { + "epoch": 1.53, + "learning_rate": 1.3539951471244224e-05, + "loss": 0.4746, + "step": 2292 + }, + { + "epoch": 1.53, + "learning_rate": 1.3534996226105144e-05, + "loss": 0.4904, + "step": 2293 + }, + { + "epoch": 1.53, + "learning_rate": 1.3530039988811573e-05, + "loss": 0.5245, + "step": 2294 + }, + { + "epoch": 1.53, + "learning_rate": 1.3525082760754557e-05, + "loss": 0.4997, + "step": 2295 + }, + { + "epoch": 1.53, + "learning_rate": 1.352012454332543e-05, + "loss": 0.4993, + "step": 2296 + }, + { + "epoch": 1.53, + "learning_rate": 1.3515165337915788e-05, + "loss": 0.5101, + "step": 2297 + }, + { + "epoch": 1.53, + "learning_rate": 1.3510205145917517e-05, + "loss": 0.4784, + "step": 2298 + }, + { + "epoch": 1.53, + "learning_rate": 1.350524396872278e-05, + "loss": 0.4903, + "step": 2299 + }, + { + "epoch": 1.53, + "learning_rate": 1.3500281807724003e-05, + "loss": 0.4962, + "step": 2300 + }, + { + "epoch": 1.53, + "learning_rate": 1.3495318664313904e-05, + "loss": 0.475, + "step": 2301 + }, + { + "epoch": 1.53, + "learning_rate": 1.3490354539885473e-05, + "loss": 0.4922, + "step": 2302 + }, + { + "epoch": 1.53, + "learning_rate": 1.3485389435831966e-05, + "loss": 0.5075, + "step": 2303 + }, + { + "epoch": 1.53, + "learning_rate": 1.3480423353546926e-05, + "loss": 0.5166, + "step": 2304 + }, + { + "epoch": 1.53, + "learning_rate": 1.3475456294424163e-05, + "loss": 0.4945, + "step": 2305 + }, + { + "epoch": 1.53, + "learning_rate": 1.3470488259857763e-05, + "loss": 0.523, + "step": 2306 + }, + { + "epoch": 1.54, + "learning_rate": 1.3465519251242085e-05, + "loss": 0.5399, + "step": 2307 + }, + { + "epoch": 1.54, + "learning_rate": 1.3460549269971765e-05, + "loss": 0.4964, + "step": 2308 + }, + { + "epoch": 1.54, + "learning_rate": 1.345557831744171e-05, + "loss": 0.5056, + "step": 2309 + }, + { + "epoch": 1.54, + "learning_rate": 1.3450606395047094e-05, + "loss": 0.4744, + "step": 2310 + }, + { + "epoch": 1.54, + "learning_rate": 1.3445633504183376e-05, + "loss": 0.5038, + "step": 2311 + }, + { + "epoch": 1.54, + "learning_rate": 1.3440659646246275e-05, + "loss": 0.5182, + "step": 2312 + }, + { + "epoch": 1.54, + "learning_rate": 1.3435684822631783e-05, + "loss": 0.4954, + "step": 2313 + }, + { + "epoch": 1.54, + "learning_rate": 1.3430709034736169e-05, + "loss": 0.5193, + "step": 2314 + }, + { + "epoch": 1.54, + "learning_rate": 1.3425732283955968e-05, + "loss": 0.5161, + "step": 2315 + }, + { + "epoch": 1.54, + "learning_rate": 1.3420754571687989e-05, + "loss": 0.4841, + "step": 2316 + }, + { + "epoch": 1.54, + "learning_rate": 1.3415775899329306e-05, + "loss": 0.5132, + "step": 2317 + }, + { + "epoch": 1.54, + "learning_rate": 1.3410796268277264e-05, + "loss": 0.4653, + "step": 2318 + }, + { + "epoch": 1.54, + "learning_rate": 1.340581567992948e-05, + "loss": 0.5226, + "step": 2319 + }, + { + "epoch": 1.54, + "learning_rate": 1.3400834135683836e-05, + "loss": 0.5066, + "step": 2320 + }, + { + "epoch": 1.54, + "learning_rate": 1.3395851636938484e-05, + "loss": 0.5038, + "step": 2321 + }, + { + "epoch": 1.55, + "learning_rate": 1.3390868185091844e-05, + "loss": 0.4913, + "step": 2322 + }, + { + "epoch": 1.55, + "learning_rate": 1.3385883781542601e-05, + "loss": 0.5052, + "step": 2323 + }, + { + "epoch": 1.55, + "learning_rate": 1.3380898427689717e-05, + "loss": 0.4717, + "step": 2324 + }, + { + "epoch": 1.55, + "learning_rate": 1.3375912124932406e-05, + "loss": 0.4844, + "step": 2325 + }, + { + "epoch": 1.55, + "learning_rate": 1.3370924874670155e-05, + "loss": 0.5088, + "step": 2326 + }, + { + "epoch": 1.55, + "learning_rate": 1.3365936678302722e-05, + "loss": 0.5426, + "step": 2327 + }, + { + "epoch": 1.55, + "learning_rate": 1.3360947537230123e-05, + "loss": 0.506, + "step": 2328 + }, + { + "epoch": 1.55, + "learning_rate": 1.335595745285264e-05, + "loss": 0.4665, + "step": 2329 + }, + { + "epoch": 1.55, + "learning_rate": 1.3350966426570825e-05, + "loss": 0.4601, + "step": 2330 + }, + { + "epoch": 1.55, + "learning_rate": 1.3345974459785494e-05, + "loss": 0.4649, + "step": 2331 + }, + { + "epoch": 1.55, + "learning_rate": 1.3340981553897719e-05, + "loss": 0.4749, + "step": 2332 + }, + { + "epoch": 1.55, + "learning_rate": 1.333598771030884e-05, + "loss": 0.4608, + "step": 2333 + }, + { + "epoch": 1.55, + "learning_rate": 1.3330992930420466e-05, + "loss": 0.5071, + "step": 2334 + }, + { + "epoch": 1.55, + "learning_rate": 1.3325997215634457e-05, + "loss": 0.4862, + "step": 2335 + }, + { + "epoch": 1.56, + "learning_rate": 1.3321000567352944e-05, + "loss": 0.4901, + "step": 2336 + }, + { + "epoch": 1.56, + "learning_rate": 1.3316002986978326e-05, + "loss": 0.5135, + "step": 2337 + }, + { + "epoch": 1.56, + "learning_rate": 1.3311004475913247e-05, + "loss": 0.5051, + "step": 2338 + }, + { + "epoch": 1.56, + "learning_rate": 1.3306005035560623e-05, + "loss": 0.4989, + "step": 2339 + }, + { + "epoch": 1.56, + "learning_rate": 1.330100466732363e-05, + "loss": 0.4646, + "step": 2340 + }, + { + "epoch": 1.56, + "learning_rate": 1.32960033726057e-05, + "loss": 0.4715, + "step": 2341 + }, + { + "epoch": 1.56, + "learning_rate": 1.329100115281053e-05, + "loss": 0.4844, + "step": 2342 + }, + { + "epoch": 1.56, + "learning_rate": 1.3285998009342076e-05, + "loss": 0.5051, + "step": 2343 + }, + { + "epoch": 1.56, + "learning_rate": 1.328099394360455e-05, + "loss": 0.5271, + "step": 2344 + }, + { + "epoch": 1.56, + "learning_rate": 1.3275988957002429e-05, + "loss": 0.4777, + "step": 2345 + }, + { + "epoch": 1.56, + "learning_rate": 1.3270983050940435e-05, + "loss": 0.4982, + "step": 2346 + }, + { + "epoch": 1.56, + "learning_rate": 1.3265976226823569e-05, + "loss": 0.4717, + "step": 2347 + }, + { + "epoch": 1.56, + "learning_rate": 1.3260968486057068e-05, + "loss": 0.4823, + "step": 2348 + }, + { + "epoch": 1.56, + "learning_rate": 1.3255959830046446e-05, + "loss": 0.496, + "step": 2349 + }, + { + "epoch": 1.56, + "learning_rate": 1.3250950260197455e-05, + "loss": 0.4878, + "step": 2350 + }, + { + "epoch": 1.57, + "learning_rate": 1.324593977791612e-05, + "loss": 0.4919, + "step": 2351 + }, + { + "epoch": 1.57, + "learning_rate": 1.3240928384608712e-05, + "loss": 0.4997, + "step": 2352 + }, + { + "epoch": 1.57, + "learning_rate": 1.3235916081681757e-05, + "loss": 0.4921, + "step": 2353 + }, + { + "epoch": 1.57, + "learning_rate": 1.3230902870542046e-05, + "loss": 0.5137, + "step": 2354 + }, + { + "epoch": 1.57, + "learning_rate": 1.3225888752596614e-05, + "loss": 0.4929, + "step": 2355 + }, + { + "epoch": 1.57, + "learning_rate": 1.3220873729252752e-05, + "loss": 0.4413, + "step": 2356 + }, + { + "epoch": 1.57, + "learning_rate": 1.321585780191802e-05, + "loss": 0.4459, + "step": 2357 + }, + { + "epoch": 1.57, + "learning_rate": 1.3210840972000209e-05, + "loss": 0.4613, + "step": 2358 + }, + { + "epoch": 1.57, + "learning_rate": 1.3205823240907381e-05, + "loss": 0.5138, + "step": 2359 + }, + { + "epoch": 1.57, + "learning_rate": 1.3200804610047842e-05, + "loss": 0.5126, + "step": 2360 + }, + { + "epoch": 1.57, + "learning_rate": 1.319578508083015e-05, + "loss": 0.5372, + "step": 2361 + }, + { + "epoch": 1.57, + "learning_rate": 1.3190764654663122e-05, + "loss": 0.496, + "step": 2362 + }, + { + "epoch": 1.57, + "learning_rate": 1.3185743332955818e-05, + "loss": 0.4629, + "step": 2363 + }, + { + "epoch": 1.57, + "learning_rate": 1.3180721117117562e-05, + "loss": 0.5045, + "step": 2364 + }, + { + "epoch": 1.57, + "learning_rate": 1.3175698008557914e-05, + "loss": 0.4811, + "step": 2365 + }, + { + "epoch": 1.58, + "learning_rate": 1.3170674008686692e-05, + "loss": 0.4964, + "step": 2366 + }, + { + "epoch": 1.58, + "learning_rate": 1.3165649118913968e-05, + "loss": 0.5173, + "step": 2367 + }, + { + "epoch": 1.58, + "learning_rate": 1.3160623340650058e-05, + "loss": 0.4976, + "step": 2368 + }, + { + "epoch": 1.58, + "learning_rate": 1.3155596675305522e-05, + "loss": 0.4787, + "step": 2369 + }, + { + "epoch": 1.58, + "learning_rate": 1.3150569124291186e-05, + "loss": 0.4784, + "step": 2370 + }, + { + "epoch": 1.58, + "learning_rate": 1.314554068901811e-05, + "loss": 0.5282, + "step": 2371 + }, + { + "epoch": 1.58, + "learning_rate": 1.3140511370897607e-05, + "loss": 0.5015, + "step": 2372 + }, + { + "epoch": 1.58, + "learning_rate": 1.3135481171341233e-05, + "loss": 0.4699, + "step": 2373 + }, + { + "epoch": 1.58, + "learning_rate": 1.3130450091760804e-05, + "loss": 0.4843, + "step": 2374 + }, + { + "epoch": 1.58, + "learning_rate": 1.3125418133568369e-05, + "loss": 0.4747, + "step": 2375 + }, + { + "epoch": 1.58, + "learning_rate": 1.312038529817623e-05, + "loss": 0.5123, + "step": 2376 + }, + { + "epoch": 1.58, + "learning_rate": 1.3115351586996937e-05, + "loss": 0.503, + "step": 2377 + }, + { + "epoch": 1.58, + "learning_rate": 1.3110317001443278e-05, + "loss": 0.4729, + "step": 2378 + }, + { + "epoch": 1.58, + "learning_rate": 1.3105281542928296e-05, + "loss": 0.497, + "step": 2379 + }, + { + "epoch": 1.58, + "learning_rate": 1.3100245212865279e-05, + "loss": 0.5098, + "step": 2380 + }, + { + "epoch": 1.59, + "learning_rate": 1.3095208012667747e-05, + "loss": 0.5135, + "step": 2381 + }, + { + "epoch": 1.59, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.4688, + "step": 2382 + }, + { + "epoch": 1.59, + "learning_rate": 1.3085131007524483e-05, + "loss": 0.4836, + "step": 2383 + }, + { + "epoch": 1.59, + "learning_rate": 1.3080091205407027e-05, + "loss": 0.4571, + "step": 2384 + }, + { + "epoch": 1.59, + "learning_rate": 1.3075050538811611e-05, + "loss": 0.4976, + "step": 2385 + }, + { + "epoch": 1.59, + "learning_rate": 1.3070009009152984e-05, + "loss": 0.5032, + "step": 2386 + }, + { + "epoch": 1.59, + "learning_rate": 1.3064966617846126e-05, + "loss": 0.4935, + "step": 2387 + }, + { + "epoch": 1.59, + "learning_rate": 1.3059923366306272e-05, + "loss": 0.5023, + "step": 2388 + }, + { + "epoch": 1.59, + "learning_rate": 1.3054879255948896e-05, + "loss": 0.4684, + "step": 2389 + }, + { + "epoch": 1.59, + "learning_rate": 1.3049834288189702e-05, + "loss": 0.5184, + "step": 2390 + }, + { + "epoch": 1.59, + "learning_rate": 1.3044788464444648e-05, + "loss": 0.4983, + "step": 2391 + }, + { + "epoch": 1.59, + "learning_rate": 1.3039741786129927e-05, + "loss": 0.514, + "step": 2392 + }, + { + "epoch": 1.59, + "learning_rate": 1.3034694254661971e-05, + "loss": 0.4756, + "step": 2393 + }, + { + "epoch": 1.59, + "learning_rate": 1.302964587145745e-05, + "loss": 0.4788, + "step": 2394 + }, + { + "epoch": 1.59, + "learning_rate": 1.3024596637933277e-05, + "loss": 0.4821, + "step": 2395 + }, + { + "epoch": 1.6, + "learning_rate": 1.3019546555506603e-05, + "loss": 0.5358, + "step": 2396 + }, + { + "epoch": 1.6, + "learning_rate": 1.3014495625594815e-05, + "loss": 0.5028, + "step": 2397 + }, + { + "epoch": 1.6, + "learning_rate": 1.3009443849615538e-05, + "loss": 0.48, + "step": 2398 + }, + { + "epoch": 1.6, + "learning_rate": 1.3004391228986637e-05, + "loss": 0.4808, + "step": 2399 + }, + { + "epoch": 1.6, + "learning_rate": 1.2999337765126217e-05, + "loss": 0.4664, + "step": 2400 + }, + { + "epoch": 1.6, + "learning_rate": 1.2994283459452606e-05, + "loss": 0.5013, + "step": 2401 + }, + { + "epoch": 1.6, + "learning_rate": 1.2989228313384383e-05, + "loss": 0.497, + "step": 2402 + }, + { + "epoch": 1.6, + "learning_rate": 1.2984172328340357e-05, + "loss": 0.5056, + "step": 2403 + }, + { + "epoch": 1.6, + "learning_rate": 1.2979115505739573e-05, + "loss": 0.4779, + "step": 2404 + }, + { + "epoch": 1.6, + "learning_rate": 1.2974057847001306e-05, + "loss": 0.4619, + "step": 2405 + }, + { + "epoch": 1.6, + "learning_rate": 1.2968999353545082e-05, + "loss": 0.4889, + "step": 2406 + }, + { + "epoch": 1.6, + "learning_rate": 1.296394002679064e-05, + "loss": 0.5278, + "step": 2407 + }, + { + "epoch": 1.6, + "learning_rate": 1.2958879868157966e-05, + "loss": 0.501, + "step": 2408 + }, + { + "epoch": 1.6, + "learning_rate": 1.2953818879067276e-05, + "loss": 0.481, + "step": 2409 + }, + { + "epoch": 1.6, + "learning_rate": 1.2948757060939019e-05, + "loss": 0.5152, + "step": 2410 + }, + { + "epoch": 1.61, + "learning_rate": 1.2943694415193875e-05, + "loss": 0.5048, + "step": 2411 + }, + { + "epoch": 1.61, + "learning_rate": 1.2938630943252765e-05, + "loss": 0.5087, + "step": 2412 + }, + { + "epoch": 1.61, + "learning_rate": 1.293356664653683e-05, + "loss": 0.5102, + "step": 2413 + }, + { + "epoch": 1.61, + "learning_rate": 1.2928501526467448e-05, + "loss": 0.4895, + "step": 2414 + }, + { + "epoch": 1.61, + "learning_rate": 1.2923435584466228e-05, + "loss": 0.5067, + "step": 2415 + }, + { + "epoch": 1.61, + "learning_rate": 1.291836882195501e-05, + "loss": 0.5439, + "step": 2416 + }, + { + "epoch": 1.61, + "learning_rate": 1.2913301240355861e-05, + "loss": 0.473, + "step": 2417 + }, + { + "epoch": 1.61, + "learning_rate": 1.2908232841091088e-05, + "loss": 0.4874, + "step": 2418 + }, + { + "epoch": 1.61, + "learning_rate": 1.2903163625583213e-05, + "loss": 0.5007, + "step": 2419 + }, + { + "epoch": 1.61, + "learning_rate": 1.2898093595254998e-05, + "loss": 0.5122, + "step": 2420 + }, + { + "epoch": 1.61, + "learning_rate": 1.2893022751529425e-05, + "loss": 0.4922, + "step": 2421 + }, + { + "epoch": 1.61, + "learning_rate": 1.2887951095829713e-05, + "loss": 0.4741, + "step": 2422 + }, + { + "epoch": 1.61, + "learning_rate": 1.288287862957931e-05, + "loss": 0.4794, + "step": 2423 + }, + { + "epoch": 1.61, + "learning_rate": 1.2877805354201876e-05, + "loss": 0.5161, + "step": 2424 + }, + { + "epoch": 1.61, + "learning_rate": 1.2872731271121315e-05, + "loss": 0.4921, + "step": 2425 + }, + { + "epoch": 1.62, + "learning_rate": 1.286765638176175e-05, + "loss": 0.4682, + "step": 2426 + }, + { + "epoch": 1.62, + "learning_rate": 1.2862580687547534e-05, + "loss": 0.5074, + "step": 2427 + }, + { + "epoch": 1.62, + "learning_rate": 1.2857504189903241e-05, + "loss": 0.5238, + "step": 2428 + }, + { + "epoch": 1.62, + "learning_rate": 1.2852426890253676e-05, + "loss": 0.4943, + "step": 2429 + }, + { + "epoch": 1.62, + "learning_rate": 1.2847348790023858e-05, + "loss": 0.4989, + "step": 2430 + }, + { + "epoch": 1.62, + "learning_rate": 1.284226989063905e-05, + "loss": 0.4927, + "step": 2431 + }, + { + "epoch": 1.62, + "learning_rate": 1.283719019352472e-05, + "loss": 0.5004, + "step": 2432 + }, + { + "epoch": 1.62, + "learning_rate": 1.2832109700106572e-05, + "loss": 0.4837, + "step": 2433 + }, + { + "epoch": 1.62, + "learning_rate": 1.2827028411810527e-05, + "loss": 0.4997, + "step": 2434 + }, + { + "epoch": 1.62, + "learning_rate": 1.2821946330062738e-05, + "loss": 0.4895, + "step": 2435 + }, + { + "epoch": 1.62, + "learning_rate": 1.2816863456289565e-05, + "loss": 0.5072, + "step": 2436 + }, + { + "epoch": 1.62, + "learning_rate": 1.2811779791917605e-05, + "loss": 0.4691, + "step": 2437 + }, + { + "epoch": 1.62, + "learning_rate": 1.2806695338373669e-05, + "loss": 0.4917, + "step": 2438 + }, + { + "epoch": 1.62, + "learning_rate": 1.2801610097084796e-05, + "loss": 0.4832, + "step": 2439 + }, + { + "epoch": 1.63, + "learning_rate": 1.2796524069478243e-05, + "loss": 0.5057, + "step": 2440 + }, + { + "epoch": 1.63, + "learning_rate": 1.2791437256981479e-05, + "loss": 0.4779, + "step": 2441 + }, + { + "epoch": 1.63, + "learning_rate": 1.2786349661022205e-05, + "loss": 0.5197, + "step": 2442 + }, + { + "epoch": 1.63, + "learning_rate": 1.2781261283028344e-05, + "loss": 0.4913, + "step": 2443 + }, + { + "epoch": 1.63, + "learning_rate": 1.2776172124428023e-05, + "loss": 0.4637, + "step": 2444 + }, + { + "epoch": 1.63, + "learning_rate": 1.27710821866496e-05, + "loss": 0.4851, + "step": 2445 + }, + { + "epoch": 1.63, + "learning_rate": 1.2765991471121655e-05, + "loss": 0.4761, + "step": 2446 + }, + { + "epoch": 1.63, + "learning_rate": 1.2760899979272977e-05, + "loss": 0.4816, + "step": 2447 + }, + { + "epoch": 1.63, + "learning_rate": 1.275580771253257e-05, + "loss": 0.4659, + "step": 2448 + }, + { + "epoch": 1.63, + "learning_rate": 1.2750714672329676e-05, + "loss": 0.463, + "step": 2449 + }, + { + "epoch": 1.63, + "learning_rate": 1.2745620860093726e-05, + "loss": 0.4985, + "step": 2450 + }, + { + "epoch": 1.63, + "learning_rate": 1.274052627725439e-05, + "loss": 0.4847, + "step": 2451 + }, + { + "epoch": 1.63, + "learning_rate": 1.2735430925241543e-05, + "loss": 0.4654, + "step": 2452 + }, + { + "epoch": 1.63, + "learning_rate": 1.273033480548528e-05, + "loss": 0.4958, + "step": 2453 + }, + { + "epoch": 1.63, + "learning_rate": 1.272523791941591e-05, + "loss": 0.5283, + "step": 2454 + }, + { + "epoch": 1.64, + "learning_rate": 1.2720140268463958e-05, + "loss": 0.4846, + "step": 2455 + }, + { + "epoch": 1.64, + "learning_rate": 1.2715041854060161e-05, + "loss": 0.502, + "step": 2456 + }, + { + "epoch": 1.64, + "learning_rate": 1.2709942677635474e-05, + "loss": 0.4987, + "step": 2457 + }, + { + "epoch": 1.64, + "learning_rate": 1.2704842740621061e-05, + "loss": 0.48, + "step": 2458 + }, + { + "epoch": 1.64, + "learning_rate": 1.2699742044448311e-05, + "loss": 0.45, + "step": 2459 + }, + { + "epoch": 1.64, + "learning_rate": 1.2694640590548808e-05, + "loss": 0.5029, + "step": 2460 + }, + { + "epoch": 1.64, + "learning_rate": 1.2689538380354367e-05, + "loss": 0.4948, + "step": 2461 + }, + { + "epoch": 1.64, + "learning_rate": 1.2684435415297e-05, + "loss": 0.5004, + "step": 2462 + }, + { + "epoch": 1.64, + "learning_rate": 1.267933169680894e-05, + "loss": 0.518, + "step": 2463 + }, + { + "epoch": 1.64, + "learning_rate": 1.2674227226322633e-05, + "loss": 0.4908, + "step": 2464 + }, + { + "epoch": 1.64, + "learning_rate": 1.2669122005270724e-05, + "loss": 0.4942, + "step": 2465 + }, + { + "epoch": 1.64, + "learning_rate": 1.2664016035086081e-05, + "loss": 0.4937, + "step": 2466 + }, + { + "epoch": 1.64, + "learning_rate": 1.2658909317201782e-05, + "loss": 0.512, + "step": 2467 + }, + { + "epoch": 1.64, + "learning_rate": 1.2653801853051105e-05, + "loss": 0.4871, + "step": 2468 + }, + { + "epoch": 1.64, + "learning_rate": 1.2648693644067545e-05, + "loss": 0.4573, + "step": 2469 + }, + { + "epoch": 1.65, + "learning_rate": 1.2643584691684802e-05, + "loss": 0.5149, + "step": 2470 + }, + { + "epoch": 1.65, + "learning_rate": 1.2638474997336794e-05, + "loss": 0.4781, + "step": 2471 + }, + { + "epoch": 1.65, + "learning_rate": 1.2633364562457632e-05, + "loss": 0.4893, + "step": 2472 + }, + { + "epoch": 1.65, + "learning_rate": 1.262825338848165e-05, + "loss": 0.4776, + "step": 2473 + }, + { + "epoch": 1.65, + "learning_rate": 1.262314147684338e-05, + "loss": 0.5026, + "step": 2474 + }, + { + "epoch": 1.65, + "learning_rate": 1.2618028828977563e-05, + "loss": 0.5018, + "step": 2475 + }, + { + "epoch": 1.65, + "learning_rate": 1.2612915446319148e-05, + "loss": 0.5081, + "step": 2476 + }, + { + "epoch": 1.65, + "learning_rate": 1.2607801330303287e-05, + "loss": 0.4972, + "step": 2477 + }, + { + "epoch": 1.65, + "learning_rate": 1.2602686482365343e-05, + "loss": 0.5139, + "step": 2478 + }, + { + "epoch": 1.65, + "learning_rate": 1.2597570903940879e-05, + "loss": 0.5296, + "step": 2479 + }, + { + "epoch": 1.65, + "learning_rate": 1.259245459646567e-05, + "loss": 0.4634, + "step": 2480 + }, + { + "epoch": 1.65, + "learning_rate": 1.258733756137569e-05, + "loss": 0.5005, + "step": 2481 + }, + { + "epoch": 1.65, + "learning_rate": 1.2582219800107113e-05, + "loss": 0.5058, + "step": 2482 + }, + { + "epoch": 1.65, + "learning_rate": 1.257710131409633e-05, + "loss": 0.4789, + "step": 2483 + }, + { + "epoch": 1.65, + "learning_rate": 1.2571982104779927e-05, + "loss": 0.497, + "step": 2484 + }, + { + "epoch": 1.66, + "learning_rate": 1.2566862173594689e-05, + "loss": 0.5065, + "step": 2485 + }, + { + "epoch": 1.66, + "learning_rate": 1.256174152197761e-05, + "loss": 0.526, + "step": 2486 + }, + { + "epoch": 1.66, + "learning_rate": 1.2556620151365886e-05, + "loss": 0.5361, + "step": 2487 + }, + { + "epoch": 1.66, + "learning_rate": 1.2551498063196914e-05, + "loss": 0.4911, + "step": 2488 + }, + { + "epoch": 1.66, + "learning_rate": 1.254637525890829e-05, + "loss": 0.4768, + "step": 2489 + }, + { + "epoch": 1.66, + "learning_rate": 1.2541251739937814e-05, + "loss": 0.4887, + "step": 2490 + }, + { + "epoch": 1.66, + "learning_rate": 1.2536127507723486e-05, + "loss": 0.4734, + "step": 2491 + }, + { + "epoch": 1.66, + "learning_rate": 1.2531002563703502e-05, + "loss": 0.453, + "step": 2492 + }, + { + "epoch": 1.66, + "learning_rate": 1.2525876909316263e-05, + "loss": 0.4697, + "step": 2493 + }, + { + "epoch": 1.66, + "learning_rate": 1.252075054600037e-05, + "loss": 0.4998, + "step": 2494 + }, + { + "epoch": 1.66, + "learning_rate": 1.2515623475194623e-05, + "loss": 0.4756, + "step": 2495 + }, + { + "epoch": 1.66, + "learning_rate": 1.251049569833801e-05, + "loss": 0.4939, + "step": 2496 + }, + { + "epoch": 1.66, + "learning_rate": 1.250536721686973e-05, + "loss": 0.5076, + "step": 2497 + }, + { + "epoch": 1.66, + "learning_rate": 1.2500238032229177e-05, + "loss": 0.5119, + "step": 2498 + }, + { + "epoch": 1.66, + "learning_rate": 1.2495108145855938e-05, + "loss": 0.512, + "step": 2499 + }, + { + "epoch": 1.67, + "learning_rate": 1.2489977559189796e-05, + "loss": 0.5, + "step": 2500 + }, + { + "epoch": 1.67, + "learning_rate": 1.2484846273670745e-05, + "loss": 0.514, + "step": 2501 + }, + { + "epoch": 1.67, + "learning_rate": 1.2479714290738953e-05, + "loss": 0.4768, + "step": 2502 + }, + { + "epoch": 1.67, + "learning_rate": 1.24745816118348e-05, + "loss": 0.4813, + "step": 2503 + }, + { + "epoch": 1.67, + "learning_rate": 1.2469448238398859e-05, + "loss": 0.4583, + "step": 2504 + }, + { + "epoch": 1.67, + "learning_rate": 1.2464314171871888e-05, + "loss": 0.5163, + "step": 2505 + }, + { + "epoch": 1.67, + "learning_rate": 1.2459179413694849e-05, + "loss": 0.4898, + "step": 2506 + }, + { + "epoch": 1.67, + "learning_rate": 1.24540439653089e-05, + "loss": 0.4777, + "step": 2507 + }, + { + "epoch": 1.67, + "learning_rate": 1.2448907828155383e-05, + "loss": 0.5537, + "step": 2508 + }, + { + "epoch": 1.67, + "learning_rate": 1.2443771003675842e-05, + "loss": 0.4661, + "step": 2509 + }, + { + "epoch": 1.67, + "learning_rate": 1.2438633493312016e-05, + "loss": 0.5147, + "step": 2510 + }, + { + "epoch": 1.67, + "learning_rate": 1.243349529850582e-05, + "loss": 0.4929, + "step": 2511 + }, + { + "epoch": 1.67, + "learning_rate": 1.2428356420699378e-05, + "loss": 0.4882, + "step": 2512 + }, + { + "epoch": 1.67, + "learning_rate": 1.2423216861335004e-05, + "loss": 0.5021, + "step": 2513 + }, + { + "epoch": 1.67, + "learning_rate": 1.2418076621855191e-05, + "loss": 0.4877, + "step": 2514 + }, + { + "epoch": 1.68, + "learning_rate": 1.241293570370264e-05, + "loss": 0.4847, + "step": 2515 + }, + { + "epoch": 1.68, + "learning_rate": 1.2407794108320229e-05, + "loss": 0.478, + "step": 2516 + }, + { + "epoch": 1.68, + "learning_rate": 1.2402651837151029e-05, + "loss": 0.5041, + "step": 2517 + }, + { + "epoch": 1.68, + "learning_rate": 1.2397508891638308e-05, + "loss": 0.4899, + "step": 2518 + }, + { + "epoch": 1.68, + "learning_rate": 1.2392365273225513e-05, + "loss": 0.4775, + "step": 2519 + }, + { + "epoch": 1.68, + "learning_rate": 1.2387220983356283e-05, + "loss": 0.4811, + "step": 2520 + }, + { + "epoch": 1.68, + "learning_rate": 1.2382076023474455e-05, + "loss": 0.5075, + "step": 2521 + }, + { + "epoch": 1.68, + "learning_rate": 1.2376930395024039e-05, + "loss": 0.4862, + "step": 2522 + }, + { + "epoch": 1.68, + "learning_rate": 1.2371784099449242e-05, + "loss": 0.5041, + "step": 2523 + }, + { + "epoch": 1.68, + "learning_rate": 1.236663713819446e-05, + "loss": 0.5246, + "step": 2524 + }, + { + "epoch": 1.68, + "learning_rate": 1.2361489512704264e-05, + "loss": 0.508, + "step": 2525 + }, + { + "epoch": 1.68, + "learning_rate": 1.2356341224423422e-05, + "loss": 0.4847, + "step": 2526 + }, + { + "epoch": 1.68, + "learning_rate": 1.235119227479689e-05, + "loss": 0.4775, + "step": 2527 + }, + { + "epoch": 1.68, + "learning_rate": 1.2346042665269799e-05, + "loss": 0.5094, + "step": 2528 + }, + { + "epoch": 1.68, + "learning_rate": 1.2340892397287475e-05, + "loss": 0.5002, + "step": 2529 + }, + { + "epoch": 1.69, + "learning_rate": 1.2335741472295426e-05, + "loss": 0.4888, + "step": 2530 + }, + { + "epoch": 1.69, + "learning_rate": 1.2330589891739338e-05, + "loss": 0.4987, + "step": 2531 + }, + { + "epoch": 1.69, + "learning_rate": 1.2325437657065089e-05, + "loss": 0.4593, + "step": 2532 + }, + { + "epoch": 1.69, + "learning_rate": 1.2320284769718739e-05, + "loss": 0.495, + "step": 2533 + }, + { + "epoch": 1.69, + "learning_rate": 1.2315131231146526e-05, + "loss": 0.4926, + "step": 2534 + }, + { + "epoch": 1.69, + "learning_rate": 1.230997704279488e-05, + "loss": 0.4855, + "step": 2535 + }, + { + "epoch": 1.69, + "learning_rate": 1.2304822206110409e-05, + "loss": 0.4989, + "step": 2536 + }, + { + "epoch": 1.69, + "learning_rate": 1.2299666722539897e-05, + "loss": 0.5409, + "step": 2537 + }, + { + "epoch": 1.69, + "learning_rate": 1.2294510593530318e-05, + "loss": 0.4749, + "step": 2538 + }, + { + "epoch": 1.69, + "learning_rate": 1.2289353820528825e-05, + "loss": 0.4767, + "step": 2539 + }, + { + "epoch": 1.69, + "learning_rate": 1.2284196404982746e-05, + "loss": 0.5033, + "step": 2540 + }, + { + "epoch": 1.69, + "learning_rate": 1.2279038348339595e-05, + "loss": 0.4805, + "step": 2541 + }, + { + "epoch": 1.69, + "learning_rate": 1.227387965204707e-05, + "loss": 0.4998, + "step": 2542 + }, + { + "epoch": 1.69, + "learning_rate": 1.226872031755304e-05, + "loss": 0.4781, + "step": 2543 + }, + { + "epoch": 1.7, + "learning_rate": 1.2263560346305556e-05, + "loss": 0.4978, + "step": 2544 + }, + { + "epoch": 1.7, + "learning_rate": 1.2258399739752848e-05, + "loss": 0.484, + "step": 2545 + }, + { + "epoch": 1.7, + "learning_rate": 1.2253238499343328e-05, + "loss": 0.4384, + "step": 2546 + }, + { + "epoch": 1.7, + "learning_rate": 1.2248076626525578e-05, + "loss": 0.462, + "step": 2547 + }, + { + "epoch": 1.7, + "learning_rate": 1.2242914122748363e-05, + "loss": 0.496, + "step": 2548 + }, + { + "epoch": 1.7, + "learning_rate": 1.223775098946063e-05, + "loss": 0.4909, + "step": 2549 + }, + { + "epoch": 1.7, + "learning_rate": 1.223258722811149e-05, + "loss": 0.4811, + "step": 2550 + }, + { + "epoch": 1.7, + "learning_rate": 1.2227422840150238e-05, + "loss": 0.5114, + "step": 2551 + }, + { + "epoch": 1.7, + "learning_rate": 1.2222257827026345e-05, + "loss": 0.4714, + "step": 2552 + }, + { + "epoch": 1.7, + "learning_rate": 1.221709219018946e-05, + "loss": 0.4699, + "step": 2553 + }, + { + "epoch": 1.7, + "learning_rate": 1.2211925931089394e-05, + "loss": 0.4859, + "step": 2554 + }, + { + "epoch": 1.7, + "learning_rate": 1.2206759051176151e-05, + "loss": 0.4844, + "step": 2555 + }, + { + "epoch": 1.7, + "learning_rate": 1.2201591551899898e-05, + "loss": 0.4592, + "step": 2556 + }, + { + "epoch": 1.7, + "learning_rate": 1.2196423434710978e-05, + "loss": 0.5196, + "step": 2557 + }, + { + "epoch": 1.7, + "learning_rate": 1.2191254701059904e-05, + "loss": 0.4786, + "step": 2558 + }, + { + "epoch": 1.71, + "learning_rate": 1.2186085352397374e-05, + "loss": 0.4673, + "step": 2559 + }, + { + "epoch": 1.71, + "learning_rate": 1.218091539017424e-05, + "loss": 0.4997, + "step": 2560 + }, + { + "epoch": 1.71, + "learning_rate": 1.2175744815841543e-05, + "loss": 0.5088, + "step": 2561 + }, + { + "epoch": 1.71, + "learning_rate": 1.2170573630850486e-05, + "loss": 0.476, + "step": 2562 + }, + { + "epoch": 1.71, + "learning_rate": 1.216540183665245e-05, + "loss": 0.4563, + "step": 2563 + }, + { + "epoch": 1.71, + "learning_rate": 1.2160229434698984e-05, + "loss": 0.4688, + "step": 2564 + }, + { + "epoch": 1.71, + "learning_rate": 1.2155056426441803e-05, + "loss": 0.5236, + "step": 2565 + }, + { + "epoch": 1.71, + "learning_rate": 1.2149882813332796e-05, + "loss": 0.486, + "step": 2566 + }, + { + "epoch": 1.71, + "learning_rate": 1.2144708596824027e-05, + "loss": 0.4992, + "step": 2567 + }, + { + "epoch": 1.71, + "learning_rate": 1.213953377836772e-05, + "loss": 0.5141, + "step": 2568 + }, + { + "epoch": 1.71, + "learning_rate": 1.2134358359416277e-05, + "loss": 0.4961, + "step": 2569 + }, + { + "epoch": 1.71, + "learning_rate": 1.212918234142226e-05, + "loss": 0.4442, + "step": 2570 + }, + { + "epoch": 1.71, + "learning_rate": 1.2124005725838404e-05, + "loss": 0.5016, + "step": 2571 + }, + { + "epoch": 1.71, + "learning_rate": 1.2118828514117611e-05, + "loss": 0.5002, + "step": 2572 + }, + { + "epoch": 1.71, + "learning_rate": 1.2113650707712952e-05, + "loss": 0.486, + "step": 2573 + }, + { + "epoch": 1.72, + "learning_rate": 1.2108472308077661e-05, + "loss": 0.4673, + "step": 2574 + }, + { + "epoch": 1.72, + "learning_rate": 1.2103293316665137e-05, + "loss": 0.4508, + "step": 2575 + }, + { + "epoch": 1.72, + "learning_rate": 1.2098113734928957e-05, + "loss": 0.4631, + "step": 2576 + }, + { + "epoch": 1.72, + "learning_rate": 1.209293356432285e-05, + "loss": 0.4737, + "step": 2577 + }, + { + "epoch": 1.72, + "learning_rate": 1.2087752806300715e-05, + "loss": 0.4912, + "step": 2578 + }, + { + "epoch": 1.72, + "learning_rate": 1.208257146231662e-05, + "loss": 0.4755, + "step": 2579 + }, + { + "epoch": 1.72, + "learning_rate": 1.2077389533824789e-05, + "loss": 0.5224, + "step": 2580 + }, + { + "epoch": 1.72, + "learning_rate": 1.2072207022279618e-05, + "loss": 0.4868, + "step": 2581 + }, + { + "epoch": 1.72, + "learning_rate": 1.2067023929135664e-05, + "loss": 0.5011, + "step": 2582 + }, + { + "epoch": 1.72, + "learning_rate": 1.2061840255847645e-05, + "loss": 0.5079, + "step": 2583 + }, + { + "epoch": 1.72, + "learning_rate": 1.2056656003870448e-05, + "loss": 0.4912, + "step": 2584 + }, + { + "epoch": 1.72, + "learning_rate": 1.2051471174659116e-05, + "loss": 0.5013, + "step": 2585 + }, + { + "epoch": 1.72, + "learning_rate": 1.204628576966885e-05, + "loss": 0.4873, + "step": 2586 + }, + { + "epoch": 1.72, + "learning_rate": 1.2041099790355025e-05, + "loss": 0.4853, + "step": 2587 + }, + { + "epoch": 1.72, + "learning_rate": 1.203591323817317e-05, + "loss": 0.481, + "step": 2588 + }, + { + "epoch": 1.73, + "learning_rate": 1.2030726114578974e-05, + "loss": 0.5212, + "step": 2589 + }, + { + "epoch": 1.73, + "learning_rate": 1.2025538421028293e-05, + "loss": 0.4934, + "step": 2590 + }, + { + "epoch": 1.73, + "learning_rate": 1.2020350158977131e-05, + "loss": 0.5001, + "step": 2591 + }, + { + "epoch": 1.73, + "learning_rate": 1.2015161329881663e-05, + "loss": 0.4796, + "step": 2592 + }, + { + "epoch": 1.73, + "learning_rate": 1.200997193519822e-05, + "loss": 0.4958, + "step": 2593 + }, + { + "epoch": 1.73, + "learning_rate": 1.2004781976383286e-05, + "loss": 0.5291, + "step": 2594 + }, + { + "epoch": 1.73, + "learning_rate": 1.1999591454893511e-05, + "loss": 0.5025, + "step": 2595 + }, + { + "epoch": 1.73, + "learning_rate": 1.1994400372185698e-05, + "loss": 0.4827, + "step": 2596 + }, + { + "epoch": 1.73, + "learning_rate": 1.1989208729716808e-05, + "loss": 0.4829, + "step": 2597 + }, + { + "epoch": 1.73, + "learning_rate": 1.1984016528943962e-05, + "loss": 0.485, + "step": 2598 + }, + { + "epoch": 1.73, + "learning_rate": 1.1978823771324439e-05, + "loss": 0.5281, + "step": 2599 + }, + { + "epoch": 1.73, + "learning_rate": 1.1973630458315667e-05, + "loss": 0.4634, + "step": 2600 + }, + { + "epoch": 1.73, + "learning_rate": 1.1968436591375236e-05, + "loss": 0.4909, + "step": 2601 + }, + { + "epoch": 1.73, + "learning_rate": 1.196324217196089e-05, + "loss": 0.5222, + "step": 2602 + }, + { + "epoch": 1.73, + "learning_rate": 1.1958047201530527e-05, + "loss": 0.4826, + "step": 2603 + }, + { + "epoch": 1.74, + "learning_rate": 1.1952851681542201e-05, + "loss": 0.4825, + "step": 2604 + }, + { + "epoch": 1.74, + "learning_rate": 1.194765561345412e-05, + "loss": 0.475, + "step": 2605 + }, + { + "epoch": 1.74, + "learning_rate": 1.1942458998724642e-05, + "loss": 0.5081, + "step": 2606 + }, + { + "epoch": 1.74, + "learning_rate": 1.1937261838812286e-05, + "loss": 0.4609, + "step": 2607 + }, + { + "epoch": 1.74, + "learning_rate": 1.1932064135175723e-05, + "loss": 0.5037, + "step": 2608 + }, + { + "epoch": 1.74, + "learning_rate": 1.1926865889273762e-05, + "loss": 0.4807, + "step": 2609 + }, + { + "epoch": 1.74, + "learning_rate": 1.1921667102565384e-05, + "loss": 0.492, + "step": 2610 + }, + { + "epoch": 1.74, + "learning_rate": 1.1916467776509717e-05, + "loss": 0.5251, + "step": 2611 + }, + { + "epoch": 1.74, + "learning_rate": 1.1911267912566027e-05, + "loss": 0.4634, + "step": 2612 + }, + { + "epoch": 1.74, + "learning_rate": 1.1906067512193748e-05, + "loss": 0.4855, + "step": 2613 + }, + { + "epoch": 1.74, + "learning_rate": 1.190086657685246e-05, + "loss": 0.4897, + "step": 2614 + }, + { + "epoch": 1.74, + "learning_rate": 1.1895665108001879e-05, + "loss": 0.4853, + "step": 2615 + }, + { + "epoch": 1.74, + "learning_rate": 1.1890463107101891e-05, + "loss": 0.4809, + "step": 2616 + }, + { + "epoch": 1.74, + "learning_rate": 1.188526057561252e-05, + "loss": 0.5015, + "step": 2617 + }, + { + "epoch": 1.74, + "learning_rate": 1.1880057514993944e-05, + "loss": 0.5009, + "step": 2618 + }, + { + "epoch": 1.75, + "learning_rate": 1.1874853926706486e-05, + "loss": 0.4776, + "step": 2619 + }, + { + "epoch": 1.75, + "learning_rate": 1.1869649812210618e-05, + "loss": 0.5275, + "step": 2620 + }, + { + "epoch": 1.75, + "learning_rate": 1.1864445172966956e-05, + "loss": 0.4887, + "step": 2621 + }, + { + "epoch": 1.75, + "learning_rate": 1.1859240010436272e-05, + "loss": 0.5075, + "step": 2622 + }, + { + "epoch": 1.75, + "learning_rate": 1.185403432607948e-05, + "loss": 0.4988, + "step": 2623 + }, + { + "epoch": 1.75, + "learning_rate": 1.1848828121357637e-05, + "loss": 0.4991, + "step": 2624 + }, + { + "epoch": 1.75, + "learning_rate": 1.1843621397731954e-05, + "loss": 0.5394, + "step": 2625 + }, + { + "epoch": 1.75, + "learning_rate": 1.1838414156663778e-05, + "loss": 0.5116, + "step": 2626 + }, + { + "epoch": 1.75, + "learning_rate": 1.183320639961461e-05, + "loss": 0.4602, + "step": 2627 + }, + { + "epoch": 1.75, + "learning_rate": 1.1827998128046091e-05, + "loss": 0.4982, + "step": 2628 + }, + { + "epoch": 1.75, + "learning_rate": 1.1822789343420008e-05, + "loss": 0.4962, + "step": 2629 + }, + { + "epoch": 1.75, + "learning_rate": 1.1817580047198287e-05, + "loss": 0.4969, + "step": 2630 + }, + { + "epoch": 1.75, + "learning_rate": 1.1812370240843012e-05, + "loss": 0.4923, + "step": 2631 + }, + { + "epoch": 1.75, + "learning_rate": 1.1807159925816389e-05, + "loss": 0.5061, + "step": 2632 + }, + { + "epoch": 1.75, + "learning_rate": 1.1801949103580788e-05, + "loss": 0.5, + "step": 2633 + }, + { + "epoch": 1.76, + "learning_rate": 1.1796737775598708e-05, + "loss": 0.5, + "step": 2634 + }, + { + "epoch": 1.76, + "learning_rate": 1.179152594333279e-05, + "loss": 0.5057, + "step": 2635 + }, + { + "epoch": 1.76, + "learning_rate": 1.1786313608245823e-05, + "loss": 0.4722, + "step": 2636 + }, + { + "epoch": 1.76, + "learning_rate": 1.1781100771800733e-05, + "loss": 0.4782, + "step": 2637 + }, + { + "epoch": 1.76, + "learning_rate": 1.1775887435460588e-05, + "loss": 0.502, + "step": 2638 + }, + { + "epoch": 1.76, + "learning_rate": 1.17706736006886e-05, + "loss": 0.4728, + "step": 2639 + }, + { + "epoch": 1.76, + "learning_rate": 1.1765459268948111e-05, + "loss": 0.4592, + "step": 2640 + }, + { + "epoch": 1.76, + "learning_rate": 1.1760244441702612e-05, + "loss": 0.4893, + "step": 2641 + }, + { + "epoch": 1.76, + "learning_rate": 1.1755029120415728e-05, + "loss": 0.4818, + "step": 2642 + }, + { + "epoch": 1.76, + "learning_rate": 1.1749813306551221e-05, + "loss": 0.4849, + "step": 2643 + }, + { + "epoch": 1.76, + "learning_rate": 1.1744597001573002e-05, + "loss": 0.5335, + "step": 2644 + }, + { + "epoch": 1.76, + "learning_rate": 1.1739380206945108e-05, + "loss": 0.4826, + "step": 2645 + }, + { + "epoch": 1.76, + "learning_rate": 1.1734162924131719e-05, + "loss": 0.4944, + "step": 2646 + }, + { + "epoch": 1.76, + "learning_rate": 1.1728945154597149e-05, + "loss": 0.4772, + "step": 2647 + }, + { + "epoch": 1.77, + "learning_rate": 1.1723726899805851e-05, + "loss": 0.4972, + "step": 2648 + }, + { + "epoch": 1.77, + "learning_rate": 1.1718508161222415e-05, + "loss": 0.4766, + "step": 2649 + }, + { + "epoch": 1.77, + "learning_rate": 1.1713288940311562e-05, + "loss": 0.4663, + "step": 2650 + }, + { + "epoch": 1.77, + "learning_rate": 1.1708069238538153e-05, + "loss": 0.4894, + "step": 2651 + }, + { + "epoch": 1.77, + "learning_rate": 1.1702849057367185e-05, + "loss": 0.468, + "step": 2652 + }, + { + "epoch": 1.77, + "learning_rate": 1.1697628398263785e-05, + "loss": 0.5206, + "step": 2653 + }, + { + "epoch": 1.77, + "learning_rate": 1.1692407262693219e-05, + "loss": 0.4589, + "step": 2654 + }, + { + "epoch": 1.77, + "learning_rate": 1.168718565212088e-05, + "loss": 0.496, + "step": 2655 + }, + { + "epoch": 1.77, + "learning_rate": 1.16819635680123e-05, + "loss": 0.4686, + "step": 2656 + }, + { + "epoch": 1.77, + "learning_rate": 1.1676741011833142e-05, + "loss": 0.4786, + "step": 2657 + }, + { + "epoch": 1.77, + "learning_rate": 1.1671517985049204e-05, + "loss": 0.4619, + "step": 2658 + }, + { + "epoch": 1.77, + "learning_rate": 1.1666294489126411e-05, + "loss": 0.5025, + "step": 2659 + }, + { + "epoch": 1.77, + "learning_rate": 1.1661070525530827e-05, + "loss": 0.4791, + "step": 2660 + }, + { + "epoch": 1.77, + "learning_rate": 1.1655846095728638e-05, + "loss": 0.4788, + "step": 2661 + }, + { + "epoch": 1.77, + "learning_rate": 1.1650621201186165e-05, + "loss": 0.4947, + "step": 2662 + }, + { + "epoch": 1.78, + "learning_rate": 1.1645395843369867e-05, + "loss": 0.4753, + "step": 2663 + }, + { + "epoch": 1.78, + "learning_rate": 1.1640170023746315e-05, + "loss": 0.4809, + "step": 2664 + }, + { + "epoch": 1.78, + "learning_rate": 1.1634943743782235e-05, + "loss": 0.4601, + "step": 2665 + }, + { + "epoch": 1.78, + "learning_rate": 1.1629717004944455e-05, + "loss": 0.4867, + "step": 2666 + }, + { + "epoch": 1.78, + "learning_rate": 1.162448980869995e-05, + "loss": 0.5017, + "step": 2667 + }, + { + "epoch": 1.78, + "learning_rate": 1.161926215651582e-05, + "loss": 0.461, + "step": 2668 + }, + { + "epoch": 1.78, + "learning_rate": 1.1614034049859289e-05, + "loss": 0.4891, + "step": 2669 + }, + { + "epoch": 1.78, + "learning_rate": 1.160880549019771e-05, + "loss": 0.4865, + "step": 2670 + }, + { + "epoch": 1.78, + "learning_rate": 1.1603576478998561e-05, + "loss": 0.4866, + "step": 2671 + }, + { + "epoch": 1.78, + "learning_rate": 1.1598347017729457e-05, + "loss": 0.4877, + "step": 2672 + }, + { + "epoch": 1.78, + "learning_rate": 1.1593117107858127e-05, + "loss": 0.5105, + "step": 2673 + }, + { + "epoch": 1.78, + "learning_rate": 1.1587886750852434e-05, + "loss": 0.4858, + "step": 2674 + }, + { + "epoch": 1.78, + "learning_rate": 1.1582655948180357e-05, + "loss": 0.4396, + "step": 2675 + }, + { + "epoch": 1.78, + "learning_rate": 1.157742470131001e-05, + "loss": 0.5135, + "step": 2676 + }, + { + "epoch": 1.78, + "learning_rate": 1.1572193011709627e-05, + "loss": 0.4782, + "step": 2677 + }, + { + "epoch": 1.79, + "learning_rate": 1.1566960880847572e-05, + "loss": 0.5253, + "step": 2678 + }, + { + "epoch": 1.79, + "learning_rate": 1.156172831019232e-05, + "loss": 0.4964, + "step": 2679 + }, + { + "epoch": 1.79, + "learning_rate": 1.1556495301212485e-05, + "loss": 0.4945, + "step": 2680 + }, + { + "epoch": 1.79, + "learning_rate": 1.1551261855376792e-05, + "loss": 0.5082, + "step": 2681 + }, + { + "epoch": 1.79, + "learning_rate": 1.1546027974154095e-05, + "loss": 0.4884, + "step": 2682 + }, + { + "epoch": 1.79, + "learning_rate": 1.154079365901337e-05, + "loss": 0.4731, + "step": 2683 + }, + { + "epoch": 1.79, + "learning_rate": 1.1535558911423706e-05, + "loss": 0.4869, + "step": 2684 + }, + { + "epoch": 1.79, + "learning_rate": 1.1530323732854326e-05, + "loss": 0.5016, + "step": 2685 + }, + { + "epoch": 1.79, + "learning_rate": 1.152508812477457e-05, + "loss": 0.5469, + "step": 2686 + }, + { + "epoch": 1.79, + "learning_rate": 1.1519852088653895e-05, + "loss": 0.5322, + "step": 2687 + }, + { + "epoch": 1.79, + "learning_rate": 1.1514615625961877e-05, + "loss": 0.4848, + "step": 2688 + }, + { + "epoch": 1.79, + "learning_rate": 1.1509378738168224e-05, + "loss": 0.4862, + "step": 2689 + }, + { + "epoch": 1.79, + "learning_rate": 1.1504141426742744e-05, + "loss": 0.4539, + "step": 2690 + }, + { + "epoch": 1.79, + "learning_rate": 1.149890369315538e-05, + "loss": 0.5259, + "step": 2691 + }, + { + "epoch": 1.79, + "learning_rate": 1.1493665538876182e-05, + "loss": 0.5112, + "step": 2692 + }, + { + "epoch": 1.8, + "learning_rate": 1.148842696537533e-05, + "loss": 0.5013, + "step": 2693 + }, + { + "epoch": 1.8, + "learning_rate": 1.1483187974123116e-05, + "loss": 0.5112, + "step": 2694 + }, + { + "epoch": 1.8, + "learning_rate": 1.147794856658994e-05, + "loss": 0.4677, + "step": 2695 + }, + { + "epoch": 1.8, + "learning_rate": 1.1472708744246334e-05, + "loss": 0.5068, + "step": 2696 + }, + { + "epoch": 1.8, + "learning_rate": 1.1467468508562943e-05, + "loss": 0.5124, + "step": 2697 + }, + { + "epoch": 1.8, + "learning_rate": 1.1462227861010513e-05, + "loss": 0.478, + "step": 2698 + }, + { + "epoch": 1.8, + "learning_rate": 1.1456986803059927e-05, + "loss": 0.4768, + "step": 2699 + }, + { + "epoch": 1.8, + "learning_rate": 1.1451745336182173e-05, + "loss": 0.4729, + "step": 2700 + }, + { + "epoch": 1.8, + "learning_rate": 1.1446503461848353e-05, + "loss": 0.4853, + "step": 2701 + }, + { + "epoch": 1.8, + "learning_rate": 1.1441261181529684e-05, + "loss": 0.5183, + "step": 2702 + }, + { + "epoch": 1.8, + "learning_rate": 1.14360184966975e-05, + "loss": 0.4719, + "step": 2703 + }, + { + "epoch": 1.8, + "learning_rate": 1.1430775408823243e-05, + "loss": 0.5446, + "step": 2704 + }, + { + "epoch": 1.8, + "learning_rate": 1.1425531919378469e-05, + "loss": 0.5112, + "step": 2705 + }, + { + "epoch": 1.8, + "learning_rate": 1.1420288029834855e-05, + "loss": 0.4901, + "step": 2706 + }, + { + "epoch": 1.8, + "learning_rate": 1.1415043741664184e-05, + "loss": 0.5144, + "step": 2707 + }, + { + "epoch": 1.81, + "learning_rate": 1.140979905633835e-05, + "loss": 0.4611, + "step": 2708 + }, + { + "epoch": 1.81, + "learning_rate": 1.1404553975329357e-05, + "loss": 0.533, + "step": 2709 + }, + { + "epoch": 1.81, + "learning_rate": 1.1399308500109326e-05, + "loss": 0.4642, + "step": 2710 + }, + { + "epoch": 1.81, + "learning_rate": 1.1394062632150483e-05, + "loss": 0.4878, + "step": 2711 + }, + { + "epoch": 1.81, + "learning_rate": 1.1388816372925166e-05, + "loss": 0.5049, + "step": 2712 + }, + { + "epoch": 1.81, + "learning_rate": 1.138356972390583e-05, + "loss": 0.4818, + "step": 2713 + }, + { + "epoch": 1.81, + "learning_rate": 1.1378322686565026e-05, + "loss": 0.5138, + "step": 2714 + }, + { + "epoch": 1.81, + "learning_rate": 1.1373075262375421e-05, + "loss": 0.4992, + "step": 2715 + }, + { + "epoch": 1.81, + "learning_rate": 1.1367827452809794e-05, + "loss": 0.487, + "step": 2716 + }, + { + "epoch": 1.81, + "learning_rate": 1.1362579259341029e-05, + "loss": 0.4777, + "step": 2717 + }, + { + "epoch": 1.81, + "learning_rate": 1.1357330683442114e-05, + "loss": 0.4653, + "step": 2718 + }, + { + "epoch": 1.81, + "learning_rate": 1.1352081726586144e-05, + "loss": 0.4782, + "step": 2719 + }, + { + "epoch": 1.81, + "learning_rate": 1.1346832390246334e-05, + "loss": 0.4701, + "step": 2720 + }, + { + "epoch": 1.81, + "learning_rate": 1.134158267589599e-05, + "loss": 0.5024, + "step": 2721 + }, + { + "epoch": 1.81, + "learning_rate": 1.1336332585008532e-05, + "loss": 0.501, + "step": 2722 + }, + { + "epoch": 1.82, + "learning_rate": 1.1331082119057485e-05, + "loss": 0.5276, + "step": 2723 + }, + { + "epoch": 1.82, + "learning_rate": 1.1325831279516475e-05, + "loss": 0.5013, + "step": 2724 + }, + { + "epoch": 1.82, + "learning_rate": 1.132058006785924e-05, + "loss": 0.4997, + "step": 2725 + }, + { + "epoch": 1.82, + "learning_rate": 1.131532848555961e-05, + "loss": 0.4982, + "step": 2726 + }, + { + "epoch": 1.82, + "learning_rate": 1.1310076534091539e-05, + "loss": 0.4904, + "step": 2727 + }, + { + "epoch": 1.82, + "learning_rate": 1.1304824214929064e-05, + "loss": 0.5161, + "step": 2728 + }, + { + "epoch": 1.82, + "learning_rate": 1.1299571529546342e-05, + "loss": 0.4886, + "step": 2729 + }, + { + "epoch": 1.82, + "learning_rate": 1.1294318479417618e-05, + "loss": 0.508, + "step": 2730 + }, + { + "epoch": 1.82, + "learning_rate": 1.1289065066017249e-05, + "loss": 0.4876, + "step": 2731 + }, + { + "epoch": 1.82, + "learning_rate": 1.1283811290819693e-05, + "loss": 0.4646, + "step": 2732 + }, + { + "epoch": 1.82, + "learning_rate": 1.1278557155299506e-05, + "loss": 0.5344, + "step": 2733 + }, + { + "epoch": 1.82, + "learning_rate": 1.1273302660931345e-05, + "loss": 0.522, + "step": 2734 + }, + { + "epoch": 1.82, + "learning_rate": 1.1268047809189976e-05, + "loss": 0.4928, + "step": 2735 + }, + { + "epoch": 1.82, + "learning_rate": 1.1262792601550254e-05, + "loss": 0.4954, + "step": 2736 + }, + { + "epoch": 1.83, + "learning_rate": 1.1257537039487141e-05, + "loss": 0.498, + "step": 2737 + }, + { + "epoch": 1.83, + "learning_rate": 1.1252281124475695e-05, + "loss": 0.5206, + "step": 2738 + }, + { + "epoch": 1.83, + "learning_rate": 1.1247024857991075e-05, + "loss": 0.4763, + "step": 2739 + }, + { + "epoch": 1.83, + "learning_rate": 1.1241768241508537e-05, + "loss": 0.4737, + "step": 2740 + }, + { + "epoch": 1.83, + "learning_rate": 1.123651127650344e-05, + "loss": 0.4942, + "step": 2741 + }, + { + "epoch": 1.83, + "learning_rate": 1.1231253964451235e-05, + "loss": 0.4861, + "step": 2742 + }, + { + "epoch": 1.83, + "learning_rate": 1.1225996306827471e-05, + "loss": 0.4762, + "step": 2743 + }, + { + "epoch": 1.83, + "learning_rate": 1.12207383051078e-05, + "loss": 0.4883, + "step": 2744 + }, + { + "epoch": 1.83, + "learning_rate": 1.1215479960767958e-05, + "loss": 0.5143, + "step": 2745 + }, + { + "epoch": 1.83, + "learning_rate": 1.1210221275283794e-05, + "loss": 0.4959, + "step": 2746 + }, + { + "epoch": 1.83, + "learning_rate": 1.120496225013124e-05, + "loss": 0.5203, + "step": 2747 + }, + { + "epoch": 1.83, + "learning_rate": 1.1199702886786327e-05, + "loss": 0.4639, + "step": 2748 + }, + { + "epoch": 1.83, + "learning_rate": 1.1194443186725186e-05, + "loss": 0.5363, + "step": 2749 + }, + { + "epoch": 1.83, + "learning_rate": 1.118918315142403e-05, + "loss": 0.5006, + "step": 2750 + }, + { + "epoch": 1.83, + "learning_rate": 1.118392278235918e-05, + "loss": 0.5088, + "step": 2751 + }, + { + "epoch": 1.84, + "learning_rate": 1.1178662081007044e-05, + "loss": 0.4637, + "step": 2752 + }, + { + "epoch": 1.84, + "learning_rate": 1.117340104884412e-05, + "loss": 0.4974, + "step": 2753 + }, + { + "epoch": 1.84, + "learning_rate": 1.1168139687347003e-05, + "loss": 0.4883, + "step": 2754 + }, + { + "epoch": 1.84, + "learning_rate": 1.1162877997992389e-05, + "loss": 0.5157, + "step": 2755 + }, + { + "epoch": 1.84, + "learning_rate": 1.1157615982257047e-05, + "loss": 0.5178, + "step": 2756 + }, + { + "epoch": 1.84, + "learning_rate": 1.115235364161785e-05, + "loss": 0.5356, + "step": 2757 + }, + { + "epoch": 1.84, + "learning_rate": 1.1147090977551764e-05, + "loss": 0.4933, + "step": 2758 + }, + { + "epoch": 1.84, + "learning_rate": 1.114182799153584e-05, + "loss": 0.5038, + "step": 2759 + }, + { + "epoch": 1.84, + "learning_rate": 1.1136564685047213e-05, + "loss": 0.472, + "step": 2760 + }, + { + "epoch": 1.84, + "learning_rate": 1.1131301059563129e-05, + "loss": 0.4746, + "step": 2761 + }, + { + "epoch": 1.84, + "learning_rate": 1.1126037116560905e-05, + "loss": 0.469, + "step": 2762 + }, + { + "epoch": 1.84, + "learning_rate": 1.1120772857517947e-05, + "loss": 0.4833, + "step": 2763 + }, + { + "epoch": 1.84, + "learning_rate": 1.1115508283911767e-05, + "loss": 0.4831, + "step": 2764 + }, + { + "epoch": 1.84, + "learning_rate": 1.1110243397219945e-05, + "loss": 0.4783, + "step": 2765 + }, + { + "epoch": 1.84, + "learning_rate": 1.1104978198920158e-05, + "loss": 0.4802, + "step": 2766 + }, + { + "epoch": 1.85, + "learning_rate": 1.1099712690490172e-05, + "loss": 0.4894, + "step": 2767 + }, + { + "epoch": 1.85, + "learning_rate": 1.1094446873407838e-05, + "loss": 0.502, + "step": 2768 + }, + { + "epoch": 1.85, + "learning_rate": 1.1089180749151098e-05, + "loss": 0.5038, + "step": 2769 + }, + { + "epoch": 1.85, + "learning_rate": 1.1083914319197967e-05, + "loss": 0.5063, + "step": 2770 + }, + { + "epoch": 1.85, + "learning_rate": 1.107864758502656e-05, + "loss": 0.4997, + "step": 2771 + }, + { + "epoch": 1.85, + "learning_rate": 1.1073380548115074e-05, + "loss": 0.4957, + "step": 2772 + }, + { + "epoch": 1.85, + "learning_rate": 1.106811320994178e-05, + "loss": 0.4841, + "step": 2773 + }, + { + "epoch": 1.85, + "learning_rate": 1.1062845571985052e-05, + "loss": 0.4778, + "step": 2774 + }, + { + "epoch": 1.85, + "learning_rate": 1.1057577635723337e-05, + "loss": 0.4859, + "step": 2775 + }, + { + "epoch": 1.85, + "learning_rate": 1.1052309402635164e-05, + "loss": 0.4793, + "step": 2776 + }, + { + "epoch": 1.85, + "learning_rate": 1.1047040874199151e-05, + "loss": 0.4921, + "step": 2777 + }, + { + "epoch": 1.85, + "learning_rate": 1.1041772051894e-05, + "loss": 0.4774, + "step": 2778 + }, + { + "epoch": 1.85, + "learning_rate": 1.1036502937198484e-05, + "loss": 0.5116, + "step": 2779 + }, + { + "epoch": 1.85, + "learning_rate": 1.1031233531591471e-05, + "loss": 0.4943, + "step": 2780 + }, + { + "epoch": 1.85, + "learning_rate": 1.1025963836551907e-05, + "loss": 0.4552, + "step": 2781 + }, + { + "epoch": 1.86, + "learning_rate": 1.1020693853558815e-05, + "loss": 0.4817, + "step": 2782 + }, + { + "epoch": 1.86, + "learning_rate": 1.1015423584091306e-05, + "loss": 0.4796, + "step": 2783 + }, + { + "epoch": 1.86, + "learning_rate": 1.1010153029628563e-05, + "loss": 0.4945, + "step": 2784 + }, + { + "epoch": 1.86, + "learning_rate": 1.1004882191649857e-05, + "loss": 0.4959, + "step": 2785 + }, + { + "epoch": 1.86, + "learning_rate": 1.099961107163453e-05, + "loss": 0.5319, + "step": 2786 + }, + { + "epoch": 1.86, + "learning_rate": 1.0994339671062012e-05, + "loss": 0.5032, + "step": 2787 + }, + { + "epoch": 1.86, + "learning_rate": 1.0989067991411808e-05, + "loss": 0.4865, + "step": 2788 + }, + { + "epoch": 1.86, + "learning_rate": 1.09837960341635e-05, + "loss": 0.4712, + "step": 2789 + }, + { + "epoch": 1.86, + "learning_rate": 1.0978523800796747e-05, + "loss": 0.5002, + "step": 2790 + }, + { + "epoch": 1.86, + "learning_rate": 1.0973251292791292e-05, + "loss": 0.5202, + "step": 2791 + }, + { + "epoch": 1.86, + "learning_rate": 1.0967978511626947e-05, + "loss": 0.4463, + "step": 2792 + }, + { + "epoch": 1.86, + "learning_rate": 1.0962705458783605e-05, + "loss": 0.498, + "step": 2793 + }, + { + "epoch": 1.86, + "learning_rate": 1.095743213574123e-05, + "loss": 0.4906, + "step": 2794 + }, + { + "epoch": 1.86, + "learning_rate": 1.0952158543979878e-05, + "loss": 0.4979, + "step": 2795 + }, + { + "epoch": 1.86, + "learning_rate": 1.0946884684979659e-05, + "loss": 0.483, + "step": 2796 + }, + { + "epoch": 1.87, + "learning_rate": 1.094161056022077e-05, + "loss": 0.4596, + "step": 2797 + }, + { + "epoch": 1.87, + "learning_rate": 1.0936336171183484e-05, + "loss": 0.497, + "step": 2798 + }, + { + "epoch": 1.87, + "learning_rate": 1.0931061519348139e-05, + "loss": 0.5555, + "step": 2799 + }, + { + "epoch": 1.87, + "learning_rate": 1.0925786606195153e-05, + "loss": 0.4815, + "step": 2800 + }, + { + "epoch": 1.87, + "learning_rate": 1.092051143320502e-05, + "loss": 0.4933, + "step": 2801 + }, + { + "epoch": 1.87, + "learning_rate": 1.09152360018583e-05, + "loss": 0.5072, + "step": 2802 + }, + { + "epoch": 1.87, + "learning_rate": 1.0909960313635632e-05, + "loss": 0.4998, + "step": 2803 + }, + { + "epoch": 1.87, + "learning_rate": 1.0904684370017726e-05, + "loss": 0.4853, + "step": 2804 + }, + { + "epoch": 1.87, + "learning_rate": 1.0899408172485357e-05, + "loss": 0.4921, + "step": 2805 + }, + { + "epoch": 1.87, + "learning_rate": 1.0894131722519376e-05, + "loss": 0.4851, + "step": 2806 + }, + { + "epoch": 1.87, + "learning_rate": 1.0888855021600711e-05, + "loss": 0.4909, + "step": 2807 + }, + { + "epoch": 1.87, + "learning_rate": 1.0883578071210348e-05, + "loss": 0.5264, + "step": 2808 + }, + { + "epoch": 1.87, + "learning_rate": 1.087830087282935e-05, + "loss": 0.4648, + "step": 2809 + }, + { + "epoch": 1.87, + "learning_rate": 1.0873023427938855e-05, + "loss": 0.4745, + "step": 2810 + }, + { + "epoch": 1.87, + "learning_rate": 1.0867745738020058e-05, + "loss": 0.4899, + "step": 2811 + }, + { + "epoch": 1.88, + "learning_rate": 1.0862467804554231e-05, + "loss": 0.4364, + "step": 2812 + }, + { + "epoch": 1.88, + "learning_rate": 1.0857189629022713e-05, + "loss": 0.496, + "step": 2813 + }, + { + "epoch": 1.88, + "learning_rate": 1.0851911212906909e-05, + "loss": 0.4562, + "step": 2814 + }, + { + "epoch": 1.88, + "learning_rate": 1.0846632557688295e-05, + "loss": 0.4876, + "step": 2815 + }, + { + "epoch": 1.88, + "learning_rate": 1.0841353664848406e-05, + "loss": 0.4894, + "step": 2816 + }, + { + "epoch": 1.88, + "learning_rate": 1.0836074535868857e-05, + "loss": 0.5028, + "step": 2817 + }, + { + "epoch": 1.88, + "learning_rate": 1.0830795172231322e-05, + "loss": 0.5217, + "step": 2818 + }, + { + "epoch": 1.88, + "learning_rate": 1.082551557541753e-05, + "loss": 0.4886, + "step": 2819 + }, + { + "epoch": 1.88, + "learning_rate": 1.08202357469093e-05, + "loss": 0.485, + "step": 2820 + }, + { + "epoch": 1.88, + "learning_rate": 1.081495568818849e-05, + "loss": 0.4923, + "step": 2821 + }, + { + "epoch": 1.88, + "learning_rate": 1.0809675400737045e-05, + "loss": 0.4748, + "step": 2822 + }, + { + "epoch": 1.88, + "learning_rate": 1.0804394886036959e-05, + "loss": 0.5513, + "step": 2823 + }, + { + "epoch": 1.88, + "learning_rate": 1.0799114145570298e-05, + "loss": 0.4667, + "step": 2824 + }, + { + "epoch": 1.88, + "learning_rate": 1.0793833180819183e-05, + "loss": 0.5125, + "step": 2825 + }, + { + "epoch": 1.88, + "learning_rate": 1.0788551993265804e-05, + "loss": 0.4919, + "step": 2826 + }, + { + "epoch": 1.89, + "learning_rate": 1.0783270584392418e-05, + "loss": 0.4575, + "step": 2827 + }, + { + "epoch": 1.89, + "learning_rate": 1.0777988955681331e-05, + "loss": 0.4957, + "step": 2828 + }, + { + "epoch": 1.89, + "learning_rate": 1.0772707108614923e-05, + "loss": 0.5028, + "step": 2829 + }, + { + "epoch": 1.89, + "learning_rate": 1.0767425044675634e-05, + "loss": 0.5076, + "step": 2830 + }, + { + "epoch": 1.89, + "learning_rate": 1.0762142765345955e-05, + "loss": 0.508, + "step": 2831 + }, + { + "epoch": 1.89, + "learning_rate": 1.075686027210845e-05, + "loss": 0.525, + "step": 2832 + }, + { + "epoch": 1.89, + "learning_rate": 1.0751577566445732e-05, + "loss": 0.497, + "step": 2833 + }, + { + "epoch": 1.89, + "learning_rate": 1.0746294649840481e-05, + "loss": 0.4831, + "step": 2834 + }, + { + "epoch": 1.89, + "learning_rate": 1.0741011523775433e-05, + "loss": 0.4795, + "step": 2835 + }, + { + "epoch": 1.89, + "learning_rate": 1.0735728189733386e-05, + "loss": 0.4814, + "step": 2836 + }, + { + "epoch": 1.89, + "learning_rate": 1.0730444649197191e-05, + "loss": 0.497, + "step": 2837 + }, + { + "epoch": 1.89, + "learning_rate": 1.0725160903649765e-05, + "loss": 0.4884, + "step": 2838 + }, + { + "epoch": 1.89, + "learning_rate": 1.0719876954574071e-05, + "loss": 0.4925, + "step": 2839 + }, + { + "epoch": 1.89, + "learning_rate": 1.0714592803453138e-05, + "loss": 0.5302, + "step": 2840 + }, + { + "epoch": 1.9, + "learning_rate": 1.0709308451770053e-05, + "loss": 0.4754, + "step": 2841 + }, + { + "epoch": 1.9, + "learning_rate": 1.070402390100795e-05, + "loss": 0.4796, + "step": 2842 + }, + { + "epoch": 1.9, + "learning_rate": 1.0698739152650031e-05, + "loss": 0.4843, + "step": 2843 + }, + { + "epoch": 1.9, + "learning_rate": 1.0693454208179544e-05, + "loss": 0.4846, + "step": 2844 + }, + { + "epoch": 1.9, + "learning_rate": 1.0688169069079793e-05, + "loss": 0.4903, + "step": 2845 + }, + { + "epoch": 1.9, + "learning_rate": 1.068288373683414e-05, + "loss": 0.5108, + "step": 2846 + }, + { + "epoch": 1.9, + "learning_rate": 1.0677598212926001e-05, + "loss": 0.5432, + "step": 2847 + }, + { + "epoch": 1.9, + "learning_rate": 1.0672312498838844e-05, + "loss": 0.4934, + "step": 2848 + }, + { + "epoch": 1.9, + "learning_rate": 1.0667026596056186e-05, + "loss": 0.5237, + "step": 2849 + }, + { + "epoch": 1.9, + "learning_rate": 1.0661740506061616e-05, + "loss": 0.5274, + "step": 2850 + }, + { + "epoch": 1.9, + "learning_rate": 1.065645423033875e-05, + "loss": 0.5007, + "step": 2851 + }, + { + "epoch": 1.9, + "learning_rate": 1.0651167770371267e-05, + "loss": 0.5235, + "step": 2852 + }, + { + "epoch": 1.9, + "learning_rate": 1.0645881127642907e-05, + "loss": 0.4926, + "step": 2853 + }, + { + "epoch": 1.9, + "learning_rate": 1.0640594303637444e-05, + "loss": 0.5101, + "step": 2854 + }, + { + "epoch": 1.9, + "learning_rate": 1.0635307299838715e-05, + "loss": 0.4413, + "step": 2855 + }, + { + "epoch": 1.91, + "learning_rate": 1.0630020117730606e-05, + "loss": 0.4903, + "step": 2856 + }, + { + "epoch": 1.91, + "learning_rate": 1.062473275879705e-05, + "loss": 0.4597, + "step": 2857 + }, + { + "epoch": 1.91, + "learning_rate": 1.0619445224522027e-05, + "loss": 0.528, + "step": 2858 + }, + { + "epoch": 1.91, + "learning_rate": 1.0614157516389579e-05, + "loss": 0.4897, + "step": 2859 + }, + { + "epoch": 1.91, + "learning_rate": 1.0608869635883776e-05, + "loss": 0.4912, + "step": 2860 + }, + { + "epoch": 1.91, + "learning_rate": 1.0603581584488755e-05, + "loss": 0.4837, + "step": 2861 + }, + { + "epoch": 1.91, + "learning_rate": 1.0598293363688694e-05, + "loss": 0.5153, + "step": 2862 + }, + { + "epoch": 1.91, + "learning_rate": 1.0593004974967817e-05, + "loss": 0.4801, + "step": 2863 + }, + { + "epoch": 1.91, + "learning_rate": 1.05877164198104e-05, + "loss": 0.508, + "step": 2864 + }, + { + "epoch": 1.91, + "learning_rate": 1.0582427699700759e-05, + "loss": 0.5199, + "step": 2865 + }, + { + "epoch": 1.91, + "learning_rate": 1.057713881612326e-05, + "loss": 0.4843, + "step": 2866 + }, + { + "epoch": 1.91, + "learning_rate": 1.0571849770562316e-05, + "loss": 0.4659, + "step": 2867 + }, + { + "epoch": 1.91, + "learning_rate": 1.0566560564502384e-05, + "loss": 0.5031, + "step": 2868 + }, + { + "epoch": 1.91, + "learning_rate": 1.0561271199427965e-05, + "loss": 0.4719, + "step": 2869 + }, + { + "epoch": 1.91, + "learning_rate": 1.0555981676823606e-05, + "loss": 0.5044, + "step": 2870 + }, + { + "epoch": 1.92, + "learning_rate": 1.0550691998173897e-05, + "loss": 0.4975, + "step": 2871 + }, + { + "epoch": 1.92, + "learning_rate": 1.0545402164963476e-05, + "loss": 0.4609, + "step": 2872 + }, + { + "epoch": 1.92, + "learning_rate": 1.0540112178677022e-05, + "loss": 0.5131, + "step": 2873 + }, + { + "epoch": 1.92, + "learning_rate": 1.053482204079925e-05, + "loss": 0.4446, + "step": 2874 + }, + { + "epoch": 1.92, + "learning_rate": 1.0529531752814928e-05, + "loss": 0.4816, + "step": 2875 + }, + { + "epoch": 1.92, + "learning_rate": 1.052424131620886e-05, + "loss": 0.4872, + "step": 2876 + }, + { + "epoch": 1.92, + "learning_rate": 1.0518950732465895e-05, + "loss": 0.52, + "step": 2877 + }, + { + "epoch": 1.92, + "learning_rate": 1.0513660003070924e-05, + "loss": 0.5158, + "step": 2878 + }, + { + "epoch": 1.92, + "learning_rate": 1.0508369129508876e-05, + "loss": 0.4921, + "step": 2879 + }, + { + "epoch": 1.92, + "learning_rate": 1.0503078113264715e-05, + "loss": 0.4898, + "step": 2880 + }, + { + "epoch": 1.92, + "learning_rate": 1.0497786955823457e-05, + "loss": 0.4705, + "step": 2881 + }, + { + "epoch": 1.92, + "learning_rate": 1.0492495658670151e-05, + "loss": 0.4777, + "step": 2882 + }, + { + "epoch": 1.92, + "learning_rate": 1.0487204223289882e-05, + "loss": 0.502, + "step": 2883 + }, + { + "epoch": 1.92, + "learning_rate": 1.0481912651167784e-05, + "loss": 0.4947, + "step": 2884 + }, + { + "epoch": 1.92, + "learning_rate": 1.0476620943789021e-05, + "loss": 0.5034, + "step": 2885 + }, + { + "epoch": 1.93, + "learning_rate": 1.0471329102638799e-05, + "loss": 0.5059, + "step": 2886 + }, + { + "epoch": 1.93, + "learning_rate": 1.0466037129202356e-05, + "loss": 0.4792, + "step": 2887 + }, + { + "epoch": 1.93, + "learning_rate": 1.046074502496497e-05, + "loss": 0.4651, + "step": 2888 + }, + { + "epoch": 1.93, + "learning_rate": 1.0455452791411962e-05, + "loss": 0.521, + "step": 2889 + }, + { + "epoch": 1.93, + "learning_rate": 1.0450160430028679e-05, + "loss": 0.4928, + "step": 2890 + }, + { + "epoch": 1.93, + "learning_rate": 1.0444867942300512e-05, + "loss": 0.4638, + "step": 2891 + }, + { + "epoch": 1.93, + "learning_rate": 1.0439575329712883e-05, + "loss": 0.4608, + "step": 2892 + }, + { + "epoch": 1.93, + "learning_rate": 1.043428259375125e-05, + "loss": 0.4649, + "step": 2893 + }, + { + "epoch": 1.93, + "learning_rate": 1.0428989735901104e-05, + "loss": 0.4693, + "step": 2894 + }, + { + "epoch": 1.93, + "learning_rate": 1.0423696757647977e-05, + "loss": 0.5068, + "step": 2895 + }, + { + "epoch": 1.93, + "learning_rate": 1.0418403660477425e-05, + "loss": 0.4996, + "step": 2896 + }, + { + "epoch": 1.93, + "learning_rate": 1.0413110445875046e-05, + "loss": 0.4853, + "step": 2897 + }, + { + "epoch": 1.93, + "learning_rate": 1.0407817115326463e-05, + "loss": 0.4898, + "step": 2898 + }, + { + "epoch": 1.93, + "learning_rate": 1.0402523670317343e-05, + "loss": 0.5222, + "step": 2899 + }, + { + "epoch": 1.93, + "learning_rate": 1.039723011233337e-05, + "loss": 0.4907, + "step": 2900 + }, + { + "epoch": 1.94, + "learning_rate": 1.0391936442860271e-05, + "loss": 0.5157, + "step": 2901 + }, + { + "epoch": 1.94, + "learning_rate": 1.0386642663383802e-05, + "loss": 0.4984, + "step": 2902 + }, + { + "epoch": 1.94, + "learning_rate": 1.0381348775389745e-05, + "loss": 0.4725, + "step": 2903 + }, + { + "epoch": 1.94, + "learning_rate": 1.0376054780363917e-05, + "loss": 0.4468, + "step": 2904 + }, + { + "epoch": 1.94, + "learning_rate": 1.0370760679792173e-05, + "loss": 0.4809, + "step": 2905 + }, + { + "epoch": 1.94, + "learning_rate": 1.0365466475160377e-05, + "loss": 0.4874, + "step": 2906 + }, + { + "epoch": 1.94, + "learning_rate": 1.0360172167954439e-05, + "loss": 0.4814, + "step": 2907 + }, + { + "epoch": 1.94, + "learning_rate": 1.0354877759660296e-05, + "loss": 0.4635, + "step": 2908 + }, + { + "epoch": 1.94, + "learning_rate": 1.0349583251763905e-05, + "loss": 0.5117, + "step": 2909 + }, + { + "epoch": 1.94, + "learning_rate": 1.0344288645751257e-05, + "loss": 0.4821, + "step": 2910 + }, + { + "epoch": 1.94, + "learning_rate": 1.033899394310837e-05, + "loss": 0.4777, + "step": 2911 + }, + { + "epoch": 1.94, + "learning_rate": 1.0333699145321294e-05, + "loss": 0.4786, + "step": 2912 + }, + { + "epoch": 1.94, + "learning_rate": 1.0328404253876096e-05, + "loss": 0.4784, + "step": 2913 + }, + { + "epoch": 1.94, + "learning_rate": 1.0323109270258873e-05, + "loss": 0.5113, + "step": 2914 + }, + { + "epoch": 1.94, + "learning_rate": 1.031781419595575e-05, + "loss": 0.4958, + "step": 2915 + }, + { + "epoch": 1.95, + "learning_rate": 1.0312519032452877e-05, + "loss": 0.4805, + "step": 2916 + }, + { + "epoch": 1.95, + "learning_rate": 1.0307223781236424e-05, + "loss": 0.4957, + "step": 2917 + }, + { + "epoch": 1.95, + "learning_rate": 1.0301928443792598e-05, + "loss": 0.4952, + "step": 2918 + }, + { + "epoch": 1.95, + "learning_rate": 1.0296633021607617e-05, + "loss": 0.4686, + "step": 2919 + }, + { + "epoch": 1.95, + "learning_rate": 1.0291337516167725e-05, + "loss": 0.4754, + "step": 2920 + }, + { + "epoch": 1.95, + "learning_rate": 1.0286041928959197e-05, + "loss": 0.4714, + "step": 2921 + }, + { + "epoch": 1.95, + "learning_rate": 1.0280746261468326e-05, + "loss": 0.4981, + "step": 2922 + }, + { + "epoch": 1.95, + "learning_rate": 1.0275450515181424e-05, + "loss": 0.5099, + "step": 2923 + }, + { + "epoch": 1.95, + "learning_rate": 1.0270154691584833e-05, + "loss": 0.5042, + "step": 2924 + }, + { + "epoch": 1.95, + "learning_rate": 1.0264858792164908e-05, + "loss": 0.5074, + "step": 2925 + }, + { + "epoch": 1.95, + "learning_rate": 1.0259562818408033e-05, + "loss": 0.4982, + "step": 2926 + }, + { + "epoch": 1.95, + "learning_rate": 1.0254266771800609e-05, + "loss": 0.5062, + "step": 2927 + }, + { + "epoch": 1.95, + "learning_rate": 1.0248970653829063e-05, + "loss": 0.5191, + "step": 2928 + }, + { + "epoch": 1.95, + "learning_rate": 1.0243674465979825e-05, + "loss": 0.541, + "step": 2929 + }, + { + "epoch": 1.95, + "learning_rate": 1.0238378209739366e-05, + "loss": 0.5299, + "step": 2930 + }, + { + "epoch": 1.96, + "learning_rate": 1.0233081886594165e-05, + "loss": 0.4885, + "step": 2931 + }, + { + "epoch": 1.96, + "learning_rate": 1.0227785498030722e-05, + "loss": 0.484, + "step": 2932 + }, + { + "epoch": 1.96, + "learning_rate": 1.0222489045535553e-05, + "loss": 0.4875, + "step": 2933 + }, + { + "epoch": 1.96, + "learning_rate": 1.0217192530595196e-05, + "loss": 0.5089, + "step": 2934 + }, + { + "epoch": 1.96, + "learning_rate": 1.0211895954696204e-05, + "loss": 0.4779, + "step": 2935 + }, + { + "epoch": 1.96, + "learning_rate": 1.0206599319325148e-05, + "loss": 0.4459, + "step": 2936 + }, + { + "epoch": 1.96, + "learning_rate": 1.0201302625968616e-05, + "loss": 0.5009, + "step": 2937 + }, + { + "epoch": 1.96, + "learning_rate": 1.0196005876113209e-05, + "loss": 0.4415, + "step": 2938 + }, + { + "epoch": 1.96, + "learning_rate": 1.0190709071245547e-05, + "loss": 0.5037, + "step": 2939 + }, + { + "epoch": 1.96, + "learning_rate": 1.0185412212852268e-05, + "loss": 0.4729, + "step": 2940 + }, + { + "epoch": 1.96, + "learning_rate": 1.0180115302420019e-05, + "loss": 0.4909, + "step": 2941 + }, + { + "epoch": 1.96, + "learning_rate": 1.0174818341435466e-05, + "loss": 0.51, + "step": 2942 + }, + { + "epoch": 1.96, + "learning_rate": 1.0169521331385287e-05, + "loss": 0.5205, + "step": 2943 + }, + { + "epoch": 1.96, + "learning_rate": 1.0164224273756172e-05, + "loss": 0.4777, + "step": 2944 + }, + { + "epoch": 1.97, + "learning_rate": 1.0158927170034831e-05, + "loss": 0.4962, + "step": 2945 + }, + { + "epoch": 1.97, + "learning_rate": 1.0153630021707982e-05, + "loss": 0.5115, + "step": 2946 + }, + { + "epoch": 1.97, + "learning_rate": 1.0148332830262352e-05, + "loss": 0.4814, + "step": 2947 + }, + { + "epoch": 1.97, + "learning_rate": 1.0143035597184691e-05, + "loss": 0.4373, + "step": 2948 + }, + { + "epoch": 1.97, + "learning_rate": 1.013773832396175e-05, + "loss": 0.491, + "step": 2949 + }, + { + "epoch": 1.97, + "learning_rate": 1.0132441012080296e-05, + "loss": 0.5256, + "step": 2950 + }, + { + "epoch": 1.97, + "learning_rate": 1.0127143663027106e-05, + "loss": 0.5094, + "step": 2951 + }, + { + "epoch": 1.97, + "learning_rate": 1.012184627828897e-05, + "loss": 0.5509, + "step": 2952 + }, + { + "epoch": 1.97, + "learning_rate": 1.0116548859352682e-05, + "loss": 0.4828, + "step": 2953 + }, + { + "epoch": 1.97, + "learning_rate": 1.0111251407705052e-05, + "loss": 0.502, + "step": 2954 + }, + { + "epoch": 1.97, + "learning_rate": 1.0105953924832894e-05, + "loss": 0.5207, + "step": 2955 + }, + { + "epoch": 1.97, + "learning_rate": 1.0100656412223035e-05, + "loss": 0.518, + "step": 2956 + }, + { + "epoch": 1.97, + "learning_rate": 1.009535887136231e-05, + "loss": 0.4915, + "step": 2957 + }, + { + "epoch": 1.97, + "learning_rate": 1.0090061303737555e-05, + "loss": 0.4972, + "step": 2958 + }, + { + "epoch": 1.97, + "learning_rate": 1.0084763710835624e-05, + "loss": 0.5316, + "step": 2959 + }, + { + "epoch": 1.98, + "learning_rate": 1.0079466094143373e-05, + "loss": 0.4677, + "step": 2960 + }, + { + "epoch": 1.98, + "learning_rate": 1.0074168455147662e-05, + "loss": 0.5158, + "step": 2961 + }, + { + "epoch": 1.98, + "learning_rate": 1.006887079533536e-05, + "loss": 0.5006, + "step": 2962 + }, + { + "epoch": 1.98, + "learning_rate": 1.0063573116193346e-05, + "loss": 0.4925, + "step": 2963 + }, + { + "epoch": 1.98, + "learning_rate": 1.0058275419208496e-05, + "loss": 0.5025, + "step": 2964 + }, + { + "epoch": 1.98, + "learning_rate": 1.0052977705867697e-05, + "loss": 0.5037, + "step": 2965 + }, + { + "epoch": 1.98, + "learning_rate": 1.0047679977657836e-05, + "loss": 0.5, + "step": 2966 + }, + { + "epoch": 1.98, + "learning_rate": 1.0042382236065814e-05, + "loss": 0.5016, + "step": 2967 + }, + { + "epoch": 1.98, + "learning_rate": 1.0037084482578523e-05, + "loss": 0.4967, + "step": 2968 + }, + { + "epoch": 1.98, + "learning_rate": 1.0031786718682866e-05, + "loss": 0.457, + "step": 2969 + }, + { + "epoch": 1.98, + "learning_rate": 1.0026488945865744e-05, + "loss": 0.4925, + "step": 2970 + }, + { + "epoch": 1.98, + "learning_rate": 1.002119116561407e-05, + "loss": 0.4812, + "step": 2971 + }, + { + "epoch": 1.98, + "learning_rate": 1.0015893379414744e-05, + "loss": 0.4598, + "step": 2972 + }, + { + "epoch": 1.98, + "learning_rate": 1.0010595588754683e-05, + "loss": 0.506, + "step": 2973 + }, + { + "epoch": 1.98, + "learning_rate": 1.00052977951208e-05, + "loss": 0.4896, + "step": 2974 + }, + { + "epoch": 1.99, + "learning_rate": 1e-05, + "loss": 0.5029, + "step": 2975 + }, + { + "epoch": 1.99, + "learning_rate": 9.994702204879203e-06, + "loss": 0.4762, + "step": 2976 + }, + { + "epoch": 1.99, + "learning_rate": 9.989404411245316e-06, + "loss": 0.4894, + "step": 2977 + }, + { + "epoch": 1.99, + "learning_rate": 9.984106620585258e-06, + "loss": 0.5094, + "step": 2978 + }, + { + "epoch": 1.99, + "learning_rate": 9.978808834385936e-06, + "loss": 0.4987, + "step": 2979 + }, + { + "epoch": 1.99, + "learning_rate": 9.973511054134259e-06, + "loss": 0.5028, + "step": 2980 + }, + { + "epoch": 1.99, + "learning_rate": 9.96821328131714e-06, + "loss": 0.4956, + "step": 2981 + }, + { + "epoch": 1.99, + "learning_rate": 9.96291551742148e-06, + "loss": 0.5067, + "step": 2982 + }, + { + "epoch": 1.99, + "learning_rate": 9.957617763934188e-06, + "loss": 0.5024, + "step": 2983 + }, + { + "epoch": 1.99, + "learning_rate": 9.952320022342165e-06, + "loss": 0.5173, + "step": 2984 + }, + { + "epoch": 1.99, + "learning_rate": 9.947022294132306e-06, + "loss": 0.4876, + "step": 2985 + }, + { + "epoch": 1.99, + "learning_rate": 9.941724580791507e-06, + "loss": 0.4784, + "step": 2986 + }, + { + "epoch": 1.99, + "learning_rate": 9.936426883806657e-06, + "loss": 0.4727, + "step": 2987 + }, + { + "epoch": 1.99, + "learning_rate": 9.93112920466464e-06, + "loss": 0.48, + "step": 2988 + }, + { + "epoch": 1.99, + "learning_rate": 9.92583154485234e-06, + "loss": 0.4937, + "step": 2989 + }, + { + "epoch": 2.0, + "learning_rate": 9.920533905856634e-06, + "loss": 0.522, + "step": 2990 + }, + { + "epoch": 2.0, + "learning_rate": 9.915236289164381e-06, + "loss": 0.4809, + "step": 2991 + }, + { + "epoch": 2.0, + "learning_rate": 9.909938696262447e-06, + "loss": 0.4693, + "step": 2992 + }, + { + "epoch": 2.0, + "learning_rate": 9.904641128637693e-06, + "loss": 0.4934, + "step": 2993 + }, + { + "epoch": 2.0, + "learning_rate": 9.899343587776966e-06, + "loss": 0.5, + "step": 2994 + }, + { + "epoch": 2.0, + "learning_rate": 9.894046075167106e-06, + "loss": 0.4683, + "step": 2995 + }, + { + "epoch": 2.0, + "learning_rate": 9.888748592294953e-06, + "loss": 0.4824, + "step": 2996 + }, + { + "epoch": 2.0, + "learning_rate": 9.883451140647323e-06, + "loss": 0.49, + "step": 2997 + }, + { + "epoch": 2.0, + "learning_rate": 9.878153721711034e-06, + "loss": 0.5117, + "step": 2998 + }, + { + "epoch": 2.0, + "learning_rate": 9.872856336972896e-06, + "loss": 0.4748, + "step": 2999 + }, + { + "epoch": 2.0, + "learning_rate": 9.867558987919704e-06, + "loss": 0.5105, + "step": 3000 + }, + { + "epoch": 2.0, + "learning_rate": 9.862261676038254e-06, + "loss": 0.4925, + "step": 3001 + }, + { + "epoch": 2.0, + "learning_rate": 9.856964402815312e-06, + "loss": 0.5202, + "step": 3002 + }, + { + "epoch": 2.0, + "learning_rate": 9.851667169737651e-06, + "loss": 0.4646, + "step": 3003 + }, + { + "epoch": 2.0, + "learning_rate": 9.846369978292022e-06, + "loss": 0.4966, + "step": 3004 + }, + { + "epoch": 2.01, + "learning_rate": 9.841072829965172e-06, + "loss": 0.4671, + "step": 3005 + }, + { + "epoch": 2.01, + "learning_rate": 9.83577572624383e-06, + "loss": 0.5169, + "step": 3006 + }, + { + "epoch": 2.01, + "learning_rate": 9.830478668614718e-06, + "loss": 0.4912, + "step": 3007 + }, + { + "epoch": 2.01, + "learning_rate": 9.825181658564539e-06, + "loss": 0.4678, + "step": 3008 + }, + { + "epoch": 2.01, + "learning_rate": 9.819884697579984e-06, + "loss": 0.5064, + "step": 3009 + }, + { + "epoch": 2.01, + "learning_rate": 9.814587787147735e-06, + "loss": 0.48, + "step": 3010 + }, + { + "epoch": 2.01, + "learning_rate": 9.809290928754454e-06, + "loss": 0.501, + "step": 3011 + }, + { + "epoch": 2.01, + "learning_rate": 9.803994123886793e-06, + "loss": 0.4783, + "step": 3012 + }, + { + "epoch": 2.01, + "learning_rate": 9.798697374031388e-06, + "loss": 0.4799, + "step": 3013 + }, + { + "epoch": 2.01, + "learning_rate": 9.793400680674853e-06, + "loss": 0.4619, + "step": 3014 + }, + { + "epoch": 2.01, + "learning_rate": 9.788104045303797e-06, + "loss": 0.4934, + "step": 3015 + }, + { + "epoch": 2.01, + "learning_rate": 9.782807469404805e-06, + "loss": 0.5111, + "step": 3016 + }, + { + "epoch": 2.01, + "learning_rate": 9.777510954464448e-06, + "loss": 0.5035, + "step": 3017 + }, + { + "epoch": 2.01, + "learning_rate": 9.77221450196928e-06, + "loss": 0.5074, + "step": 3018 + }, + { + "epoch": 2.01, + "learning_rate": 9.766918113405838e-06, + "loss": 0.4702, + "step": 3019 + }, + { + "epoch": 2.02, + "learning_rate": 9.761621790260636e-06, + "loss": 0.456, + "step": 3020 + }, + { + "epoch": 2.02, + "learning_rate": 9.756325534020177e-06, + "loss": 0.469, + "step": 3021 + }, + { + "epoch": 2.02, + "learning_rate": 9.751029346170942e-06, + "loss": 0.4753, + "step": 3022 + }, + { + "epoch": 2.02, + "learning_rate": 9.74573322819939e-06, + "loss": 0.4956, + "step": 3023 + }, + { + "epoch": 2.0, + "learning_rate": 9.740437181591967e-06, + "loss": 0.437, + "step": 3024 + }, + { + "epoch": 2.0, + "learning_rate": 9.735141207835095e-06, + "loss": 0.4146, + "step": 3025 + }, + { + "epoch": 2.0, + "learning_rate": 9.72984530841517e-06, + "loss": 0.4427, + "step": 3026 + }, + { + "epoch": 2.0, + "learning_rate": 9.724549484818578e-06, + "loss": 0.4721, + "step": 3027 + }, + { + "epoch": 2.0, + "learning_rate": 9.719253738531676e-06, + "loss": 0.439, + "step": 3028 + }, + { + "epoch": 2.0, + "learning_rate": 9.713958071040803e-06, + "loss": 0.4536, + "step": 3029 + }, + { + "epoch": 2.0, + "learning_rate": 9.708662483832279e-06, + "loss": 0.4351, + "step": 3030 + }, + { + "epoch": 2.01, + "learning_rate": 9.703366978392388e-06, + "loss": 0.4254, + "step": 3031 + }, + { + "epoch": 2.01, + "learning_rate": 9.698071556207407e-06, + "loss": 0.4023, + "step": 3032 + }, + { + "epoch": 2.01, + "learning_rate": 9.69277621876358e-06, + "loss": 0.4288, + "step": 3033 + }, + { + "epoch": 2.01, + "learning_rate": 9.687480967547127e-06, + "loss": 0.4396, + "step": 3034 + }, + { + "epoch": 2.01, + "learning_rate": 9.682185804044252e-06, + "loss": 0.4131, + "step": 3035 + }, + { + "epoch": 2.01, + "learning_rate": 9.676890729741134e-06, + "loss": 0.4208, + "step": 3036 + }, + { + "epoch": 2.01, + "learning_rate": 9.67159574612391e-06, + "loss": 0.4434, + "step": 3037 + }, + { + "epoch": 2.01, + "learning_rate": 9.66630085467871e-06, + "loss": 0.4389, + "step": 3038 + }, + { + "epoch": 2.01, + "learning_rate": 9.661006056891631e-06, + "loss": 0.4429, + "step": 3039 + }, + { + "epoch": 2.01, + "learning_rate": 9.655711354248747e-06, + "loss": 0.4069, + "step": 3040 + }, + { + "epoch": 2.01, + "learning_rate": 9.650416748236099e-06, + "loss": 0.4562, + "step": 3041 + }, + { + "epoch": 2.01, + "learning_rate": 9.645122240339709e-06, + "loss": 0.4161, + "step": 3042 + }, + { + "epoch": 2.01, + "learning_rate": 9.639827832045564e-06, + "loss": 0.4611, + "step": 3043 + }, + { + "epoch": 2.01, + "learning_rate": 9.634533524839626e-06, + "loss": 0.4519, + "step": 3044 + }, + { + "epoch": 2.01, + "learning_rate": 9.62923932020783e-06, + "loss": 0.4385, + "step": 3045 + }, + { + "epoch": 2.02, + "learning_rate": 9.623945219636081e-06, + "loss": 0.4605, + "step": 3046 + }, + { + "epoch": 2.02, + "learning_rate": 9.618651224610257e-06, + "loss": 0.4065, + "step": 3047 + }, + { + "epoch": 2.02, + "learning_rate": 9.613357336616203e-06, + "loss": 0.4553, + "step": 3048 + }, + { + "epoch": 2.02, + "learning_rate": 9.608063557139732e-06, + "loss": 0.4096, + "step": 3049 + }, + { + "epoch": 2.02, + "learning_rate": 9.602769887666633e-06, + "loss": 0.4256, + "step": 3050 + }, + { + "epoch": 2.02, + "learning_rate": 9.59747632968266e-06, + "loss": 0.4498, + "step": 3051 + }, + { + "epoch": 2.02, + "learning_rate": 9.592182884673536e-06, + "loss": 0.4204, + "step": 3052 + }, + { + "epoch": 2.02, + "learning_rate": 9.586889554124957e-06, + "loss": 0.4516, + "step": 3053 + }, + { + "epoch": 2.02, + "learning_rate": 9.581596339522576e-06, + "loss": 0.4111, + "step": 3054 + }, + { + "epoch": 2.02, + "learning_rate": 9.576303242352025e-06, + "loss": 0.4505, + "step": 3055 + }, + { + "epoch": 2.02, + "learning_rate": 9.571010264098897e-06, + "loss": 0.4205, + "step": 3056 + }, + { + "epoch": 2.02, + "learning_rate": 9.565717406248752e-06, + "loss": 0.417, + "step": 3057 + }, + { + "epoch": 2.02, + "learning_rate": 9.560424670287119e-06, + "loss": 0.441, + "step": 3058 + }, + { + "epoch": 2.02, + "learning_rate": 9.555132057699493e-06, + "loss": 0.41, + "step": 3059 + }, + { + "epoch": 2.02, + "learning_rate": 9.549839569971323e-06, + "loss": 0.4201, + "step": 3060 + }, + { + "epoch": 2.03, + "learning_rate": 9.54454720858804e-06, + "loss": 0.403, + "step": 3061 + }, + { + "epoch": 2.03, + "learning_rate": 9.539254975035031e-06, + "loss": 0.3843, + "step": 3062 + }, + { + "epoch": 2.03, + "learning_rate": 9.533962870797646e-06, + "loss": 0.413, + "step": 3063 + }, + { + "epoch": 2.03, + "learning_rate": 9.528670897361203e-06, + "loss": 0.4564, + "step": 3064 + }, + { + "epoch": 2.03, + "learning_rate": 9.523379056210982e-06, + "loss": 0.4398, + "step": 3065 + }, + { + "epoch": 2.03, + "learning_rate": 9.518087348832219e-06, + "loss": 0.4408, + "step": 3066 + }, + { + "epoch": 2.03, + "learning_rate": 9.512795776710122e-06, + "loss": 0.4469, + "step": 3067 + }, + { + "epoch": 2.03, + "learning_rate": 9.507504341329852e-06, + "loss": 0.4652, + "step": 3068 + }, + { + "epoch": 2.03, + "learning_rate": 9.502213044176545e-06, + "loss": 0.4121, + "step": 3069 + }, + { + "epoch": 2.03, + "learning_rate": 9.496921886735287e-06, + "loss": 0.4466, + "step": 3070 + }, + { + "epoch": 2.03, + "learning_rate": 9.491630870491131e-06, + "loss": 0.4442, + "step": 3071 + }, + { + "epoch": 2.03, + "learning_rate": 9.486339996929079e-06, + "loss": 0.4356, + "step": 3072 + }, + { + "epoch": 2.03, + "learning_rate": 9.481049267534106e-06, + "loss": 0.442, + "step": 3073 + }, + { + "epoch": 2.03, + "learning_rate": 9.475758683791142e-06, + "loss": 0.4661, + "step": 3074 + }, + { + "epoch": 2.03, + "learning_rate": 9.470468247185076e-06, + "loss": 0.4441, + "step": 3075 + }, + { + "epoch": 2.04, + "learning_rate": 9.465177959200756e-06, + "loss": 0.4245, + "step": 3076 + }, + { + "epoch": 2.04, + "learning_rate": 9.459887821322983e-06, + "loss": 0.4399, + "step": 3077 + }, + { + "epoch": 2.04, + "learning_rate": 9.454597835036527e-06, + "loss": 0.4381, + "step": 3078 + }, + { + "epoch": 2.04, + "learning_rate": 9.449308001826104e-06, + "loss": 0.4601, + "step": 3079 + }, + { + "epoch": 2.04, + "learning_rate": 9.444018323176399e-06, + "loss": 0.4666, + "step": 3080 + }, + { + "epoch": 2.04, + "learning_rate": 9.43872880057204e-06, + "loss": 0.4482, + "step": 3081 + }, + { + "epoch": 2.04, + "learning_rate": 9.433439435497621e-06, + "loss": 0.4147, + "step": 3082 + }, + { + "epoch": 2.04, + "learning_rate": 9.428150229437689e-06, + "loss": 0.4019, + "step": 3083 + }, + { + "epoch": 2.04, + "learning_rate": 9.422861183876742e-06, + "loss": 0.4396, + "step": 3084 + }, + { + "epoch": 2.04, + "learning_rate": 9.417572300299244e-06, + "loss": 0.4445, + "step": 3085 + }, + { + "epoch": 2.04, + "learning_rate": 9.412283580189601e-06, + "loss": 0.4139, + "step": 3086 + }, + { + "epoch": 2.04, + "learning_rate": 9.406995025032183e-06, + "loss": 0.4168, + "step": 3087 + }, + { + "epoch": 2.04, + "learning_rate": 9.40170663631131e-06, + "loss": 0.4385, + "step": 3088 + }, + { + "epoch": 2.04, + "learning_rate": 9.396418415511248e-06, + "loss": 0.4235, + "step": 3089 + }, + { + "epoch": 2.04, + "learning_rate": 9.391130364116226e-06, + "loss": 0.4682, + "step": 3090 + }, + { + "epoch": 2.05, + "learning_rate": 9.385842483610426e-06, + "loss": 0.4274, + "step": 3091 + }, + { + "epoch": 2.05, + "learning_rate": 9.380554775477974e-06, + "loss": 0.4169, + "step": 3092 + }, + { + "epoch": 2.05, + "learning_rate": 9.375267241202952e-06, + "loss": 0.3905, + "step": 3093 + }, + { + "epoch": 2.05, + "learning_rate": 9.369979882269397e-06, + "loss": 0.4587, + "step": 3094 + }, + { + "epoch": 2.05, + "learning_rate": 9.364692700161287e-06, + "loss": 0.4684, + "step": 3095 + }, + { + "epoch": 2.05, + "learning_rate": 9.35940569636256e-06, + "loss": 0.4058, + "step": 3096 + }, + { + "epoch": 2.05, + "learning_rate": 9.354118872357096e-06, + "loss": 0.4391, + "step": 3097 + }, + { + "epoch": 2.05, + "learning_rate": 9.348832229628733e-06, + "loss": 0.4388, + "step": 3098 + }, + { + "epoch": 2.05, + "learning_rate": 9.343545769661252e-06, + "loss": 0.4554, + "step": 3099 + }, + { + "epoch": 2.05, + "learning_rate": 9.33825949393839e-06, + "loss": 0.412, + "step": 3100 + }, + { + "epoch": 2.05, + "learning_rate": 9.332973403943815e-06, + "loss": 0.448, + "step": 3101 + }, + { + "epoch": 2.05, + "learning_rate": 9.327687501161158e-06, + "loss": 0.4424, + "step": 3102 + }, + { + "epoch": 2.05, + "learning_rate": 9.322401787074e-06, + "loss": 0.4538, + "step": 3103 + }, + { + "epoch": 2.05, + "learning_rate": 9.317116263165862e-06, + "loss": 0.4263, + "step": 3104 + }, + { + "epoch": 2.06, + "learning_rate": 9.311830930920214e-06, + "loss": 0.4219, + "step": 3105 + }, + { + "epoch": 2.06, + "learning_rate": 9.306545791820461e-06, + "loss": 0.4664, + "step": 3106 + }, + { + "epoch": 2.06, + "learning_rate": 9.301260847349974e-06, + "loss": 0.4363, + "step": 3107 + }, + { + "epoch": 2.06, + "learning_rate": 9.295976098992053e-06, + "loss": 0.4299, + "step": 3108 + }, + { + "epoch": 2.06, + "learning_rate": 9.29069154822995e-06, + "loss": 0.4489, + "step": 3109 + }, + { + "epoch": 2.06, + "learning_rate": 9.285407196546862e-06, + "loss": 0.4411, + "step": 3110 + }, + { + "epoch": 2.06, + "learning_rate": 9.280123045425936e-06, + "loss": 0.4333, + "step": 3111 + }, + { + "epoch": 2.06, + "learning_rate": 9.274839096350241e-06, + "loss": 0.4369, + "step": 3112 + }, + { + "epoch": 2.06, + "learning_rate": 9.269555350802812e-06, + "loss": 0.435, + "step": 3113 + }, + { + "epoch": 2.06, + "learning_rate": 9.264271810266618e-06, + "loss": 0.4532, + "step": 3114 + }, + { + "epoch": 2.06, + "learning_rate": 9.25898847622457e-06, + "loss": 0.4396, + "step": 3115 + }, + { + "epoch": 2.06, + "learning_rate": 9.253705350159522e-06, + "loss": 0.4343, + "step": 3116 + }, + { + "epoch": 2.06, + "learning_rate": 9.248422433554273e-06, + "loss": 0.4988, + "step": 3117 + }, + { + "epoch": 2.06, + "learning_rate": 9.243139727891554e-06, + "loss": 0.4265, + "step": 3118 + }, + { + "epoch": 2.06, + "learning_rate": 9.237857234654048e-06, + "loss": 0.4469, + "step": 3119 + }, + { + "epoch": 2.07, + "learning_rate": 9.232574955324369e-06, + "loss": 0.4516, + "step": 3120 + }, + { + "epoch": 2.07, + "learning_rate": 9.227292891385078e-06, + "loss": 0.4299, + "step": 3121 + }, + { + "epoch": 2.07, + "learning_rate": 9.22201104431867e-06, + "loss": 0.4532, + "step": 3122 + }, + { + "epoch": 2.07, + "learning_rate": 9.216729415607588e-06, + "loss": 0.4405, + "step": 3123 + }, + { + "epoch": 2.07, + "learning_rate": 9.211448006734199e-06, + "loss": 0.4121, + "step": 3124 + }, + { + "epoch": 2.07, + "learning_rate": 9.206166819180822e-06, + "loss": 0.4103, + "step": 3125 + }, + { + "epoch": 2.07, + "learning_rate": 9.200885854429706e-06, + "loss": 0.4354, + "step": 3126 + }, + { + "epoch": 2.07, + "learning_rate": 9.195605113963042e-06, + "loss": 0.4422, + "step": 3127 + }, + { + "epoch": 2.07, + "learning_rate": 9.190324599262957e-06, + "loss": 0.4214, + "step": 3128 + }, + { + "epoch": 2.07, + "learning_rate": 9.185044311811511e-06, + "loss": 0.4335, + "step": 3129 + }, + { + "epoch": 2.07, + "learning_rate": 9.179764253090703e-06, + "loss": 0.4208, + "step": 3130 + }, + { + "epoch": 2.07, + "learning_rate": 9.174484424582471e-06, + "loss": 0.4444, + "step": 3131 + }, + { + "epoch": 2.07, + "learning_rate": 9.169204827768683e-06, + "loss": 0.4358, + "step": 3132 + }, + { + "epoch": 2.07, + "learning_rate": 9.163925464131143e-06, + "loss": 0.4224, + "step": 3133 + }, + { + "epoch": 2.07, + "learning_rate": 9.158646335151598e-06, + "loss": 0.4395, + "step": 3134 + }, + { + "epoch": 2.08, + "learning_rate": 9.153367442311712e-06, + "loss": 0.4383, + "step": 3135 + }, + { + "epoch": 2.08, + "learning_rate": 9.148088787093093e-06, + "loss": 0.4676, + "step": 3136 + }, + { + "epoch": 2.08, + "learning_rate": 9.142810370977289e-06, + "loss": 0.4364, + "step": 3137 + }, + { + "epoch": 2.08, + "learning_rate": 9.137532195445769e-06, + "loss": 0.4792, + "step": 3138 + }, + { + "epoch": 2.08, + "learning_rate": 9.132254261979943e-06, + "loss": 0.4508, + "step": 3139 + }, + { + "epoch": 2.08, + "learning_rate": 9.12697657206115e-06, + "loss": 0.45, + "step": 3140 + }, + { + "epoch": 2.08, + "learning_rate": 9.121699127170652e-06, + "loss": 0.4413, + "step": 3141 + }, + { + "epoch": 2.08, + "learning_rate": 9.116421928789655e-06, + "loss": 0.4299, + "step": 3142 + }, + { + "epoch": 2.08, + "learning_rate": 9.111144978399292e-06, + "loss": 0.4424, + "step": 3143 + }, + { + "epoch": 2.08, + "learning_rate": 9.105868277480622e-06, + "loss": 0.4621, + "step": 3144 + }, + { + "epoch": 2.08, + "learning_rate": 9.100591827514643e-06, + "loss": 0.445, + "step": 3145 + }, + { + "epoch": 2.08, + "learning_rate": 9.09531562998228e-06, + "loss": 0.4271, + "step": 3146 + }, + { + "epoch": 2.08, + "learning_rate": 9.09003968636437e-06, + "loss": 0.4619, + "step": 3147 + }, + { + "epoch": 2.08, + "learning_rate": 9.084763998141703e-06, + "loss": 0.4598, + "step": 3148 + }, + { + "epoch": 2.08, + "learning_rate": 9.079488566794984e-06, + "loss": 0.4174, + "step": 3149 + }, + { + "epoch": 2.09, + "learning_rate": 9.07421339380485e-06, + "loss": 0.4865, + "step": 3150 + }, + { + "epoch": 2.09, + "learning_rate": 9.068938480651868e-06, + "loss": 0.4655, + "step": 3151 + }, + { + "epoch": 2.09, + "learning_rate": 9.063663828816523e-06, + "loss": 0.4233, + "step": 3152 + }, + { + "epoch": 2.09, + "learning_rate": 9.058389439779233e-06, + "loss": 0.4591, + "step": 3153 + }, + { + "epoch": 2.09, + "learning_rate": 9.053115315020344e-06, + "loss": 0.4367, + "step": 3154 + }, + { + "epoch": 2.09, + "learning_rate": 9.047841456020125e-06, + "loss": 0.4348, + "step": 3155 + }, + { + "epoch": 2.09, + "learning_rate": 9.042567864258768e-06, + "loss": 0.4011, + "step": 3156 + }, + { + "epoch": 2.09, + "learning_rate": 9.0372945412164e-06, + "loss": 0.4606, + "step": 3157 + }, + { + "epoch": 2.09, + "learning_rate": 9.032021488373058e-06, + "loss": 0.3948, + "step": 3158 + }, + { + "epoch": 2.09, + "learning_rate": 9.026748707208712e-06, + "loss": 0.492, + "step": 3159 + }, + { + "epoch": 2.09, + "learning_rate": 9.021476199203255e-06, + "loss": 0.4212, + "step": 3160 + }, + { + "epoch": 2.09, + "learning_rate": 9.016203965836503e-06, + "loss": 0.4398, + "step": 3161 + }, + { + "epoch": 2.09, + "learning_rate": 9.010932008588194e-06, + "loss": 0.4554, + "step": 3162 + }, + { + "epoch": 2.09, + "learning_rate": 9.00566032893799e-06, + "loss": 0.4753, + "step": 3163 + }, + { + "epoch": 2.09, + "learning_rate": 9.000388928365473e-06, + "loss": 0.4076, + "step": 3164 + }, + { + "epoch": 2.1, + "learning_rate": 8.995117808350146e-06, + "loss": 0.4389, + "step": 3165 + }, + { + "epoch": 2.1, + "learning_rate": 8.989846970371438e-06, + "loss": 0.4405, + "step": 3166 + }, + { + "epoch": 2.1, + "learning_rate": 8.984576415908696e-06, + "loss": 0.4117, + "step": 3167 + }, + { + "epoch": 2.1, + "learning_rate": 8.979306146441185e-06, + "loss": 0.4344, + "step": 3168 + }, + { + "epoch": 2.1, + "learning_rate": 8.974036163448098e-06, + "loss": 0.4781, + "step": 3169 + }, + { + "epoch": 2.1, + "learning_rate": 8.968766468408532e-06, + "loss": 0.4679, + "step": 3170 + }, + { + "epoch": 2.1, + "learning_rate": 8.96349706280152e-06, + "loss": 0.4317, + "step": 3171 + }, + { + "epoch": 2.1, + "learning_rate": 8.958227948106005e-06, + "loss": 0.4041, + "step": 3172 + }, + { + "epoch": 2.1, + "learning_rate": 8.95295912580085e-06, + "loss": 0.4189, + "step": 3173 + }, + { + "epoch": 2.1, + "learning_rate": 8.947690597364836e-06, + "loss": 0.4271, + "step": 3174 + }, + { + "epoch": 2.1, + "learning_rate": 8.942422364276668e-06, + "loss": 0.4367, + "step": 3175 + }, + { + "epoch": 2.1, + "learning_rate": 8.937154428014951e-06, + "loss": 0.4442, + "step": 3176 + }, + { + "epoch": 2.1, + "learning_rate": 8.931886790058223e-06, + "loss": 0.4431, + "step": 3177 + }, + { + "epoch": 2.1, + "learning_rate": 8.92661945188493e-06, + "loss": 0.469, + "step": 3178 + }, + { + "epoch": 2.1, + "learning_rate": 8.921352414973441e-06, + "loss": 0.4279, + "step": 3179 + }, + { + "epoch": 2.11, + "learning_rate": 8.916085680802038e-06, + "loss": 0.4393, + "step": 3180 + }, + { + "epoch": 2.11, + "learning_rate": 8.910819250848907e-06, + "loss": 0.4693, + "step": 3181 + }, + { + "epoch": 2.11, + "learning_rate": 8.905553126592164e-06, + "loss": 0.4108, + "step": 3182 + }, + { + "epoch": 2.11, + "learning_rate": 8.900287309509831e-06, + "loss": 0.4587, + "step": 3183 + }, + { + "epoch": 2.11, + "learning_rate": 8.895021801079846e-06, + "loss": 0.4543, + "step": 3184 + }, + { + "epoch": 2.11, + "learning_rate": 8.889756602780059e-06, + "loss": 0.4242, + "step": 3185 + }, + { + "epoch": 2.11, + "learning_rate": 8.884491716088238e-06, + "loss": 0.4155, + "step": 3186 + }, + { + "epoch": 2.11, + "learning_rate": 8.879227142482055e-06, + "loss": 0.433, + "step": 3187 + }, + { + "epoch": 2.11, + "learning_rate": 8.8739628834391e-06, + "loss": 0.4293, + "step": 3188 + }, + { + "epoch": 2.11, + "learning_rate": 8.868698940436874e-06, + "loss": 0.4508, + "step": 3189 + }, + { + "epoch": 2.11, + "learning_rate": 8.863435314952787e-06, + "loss": 0.4317, + "step": 3190 + }, + { + "epoch": 2.11, + "learning_rate": 8.858172008464164e-06, + "loss": 0.4433, + "step": 3191 + }, + { + "epoch": 2.11, + "learning_rate": 8.852909022448239e-06, + "loss": 0.4192, + "step": 3192 + }, + { + "epoch": 2.11, + "learning_rate": 8.847646358382153e-06, + "loss": 0.4316, + "step": 3193 + }, + { + "epoch": 2.11, + "learning_rate": 8.842384017742956e-06, + "loss": 0.4411, + "step": 3194 + }, + { + "epoch": 2.12, + "learning_rate": 8.837122002007614e-06, + "loss": 0.412, + "step": 3195 + }, + { + "epoch": 2.12, + "learning_rate": 8.831860312652995e-06, + "loss": 0.4164, + "step": 3196 + }, + { + "epoch": 2.12, + "learning_rate": 8.82659895115588e-06, + "loss": 0.4399, + "step": 3197 + }, + { + "epoch": 2.12, + "learning_rate": 8.821337918992961e-06, + "loss": 0.4652, + "step": 3198 + }, + { + "epoch": 2.12, + "learning_rate": 8.816077217640822e-06, + "loss": 0.4325, + "step": 3199 + }, + { + "epoch": 2.12, + "learning_rate": 8.810816848575971e-06, + "loss": 0.4143, + "step": 3200 + }, + { + "epoch": 2.12, + "learning_rate": 8.805556813274817e-06, + "loss": 0.4277, + "step": 3201 + }, + { + "epoch": 2.12, + "learning_rate": 8.800297113213673e-06, + "loss": 0.4151, + "step": 3202 + }, + { + "epoch": 2.12, + "learning_rate": 8.795037749868764e-06, + "loss": 0.4328, + "step": 3203 + }, + { + "epoch": 2.12, + "learning_rate": 8.789778724716209e-06, + "loss": 0.4202, + "step": 3204 + }, + { + "epoch": 2.12, + "learning_rate": 8.784520039232044e-06, + "loss": 0.4346, + "step": 3205 + }, + { + "epoch": 2.12, + "learning_rate": 8.779261694892205e-06, + "loss": 0.4752, + "step": 3206 + }, + { + "epoch": 2.12, + "learning_rate": 8.77400369317253e-06, + "loss": 0.4587, + "step": 3207 + }, + { + "epoch": 2.12, + "learning_rate": 8.768746035548767e-06, + "loss": 0.4165, + "step": 3208 + }, + { + "epoch": 2.13, + "learning_rate": 8.763488723496565e-06, + "loss": 0.4291, + "step": 3209 + }, + { + "epoch": 2.13, + "learning_rate": 8.758231758491467e-06, + "loss": 0.4129, + "step": 3210 + }, + { + "epoch": 2.13, + "learning_rate": 8.752975142008928e-06, + "loss": 0.4158, + "step": 3211 + }, + { + "epoch": 2.13, + "learning_rate": 8.747718875524307e-06, + "loss": 0.4357, + "step": 3212 + }, + { + "epoch": 2.13, + "learning_rate": 8.74246296051286e-06, + "loss": 0.4941, + "step": 3213 + }, + { + "epoch": 2.13, + "learning_rate": 8.737207398449746e-06, + "loss": 0.4942, + "step": 3214 + }, + { + "epoch": 2.13, + "learning_rate": 8.731952190810029e-06, + "loss": 0.4383, + "step": 3215 + }, + { + "epoch": 2.13, + "learning_rate": 8.726697339068657e-06, + "loss": 0.4342, + "step": 3216 + }, + { + "epoch": 2.13, + "learning_rate": 8.721442844700499e-06, + "loss": 0.4577, + "step": 3217 + }, + { + "epoch": 2.13, + "learning_rate": 8.71618870918031e-06, + "loss": 0.4291, + "step": 3218 + }, + { + "epoch": 2.13, + "learning_rate": 8.71093493398275e-06, + "loss": 0.4078, + "step": 3219 + }, + { + "epoch": 2.13, + "learning_rate": 8.705681520582382e-06, + "loss": 0.4073, + "step": 3220 + }, + { + "epoch": 2.13, + "learning_rate": 8.700428470453663e-06, + "loss": 0.4608, + "step": 3221 + }, + { + "epoch": 2.13, + "learning_rate": 8.695175785070938e-06, + "loss": 0.413, + "step": 3222 + }, + { + "epoch": 2.13, + "learning_rate": 8.689923465908464e-06, + "loss": 0.4612, + "step": 3223 + }, + { + "epoch": 2.14, + "learning_rate": 8.684671514440391e-06, + "loss": 0.4355, + "step": 3224 + }, + { + "epoch": 2.14, + "learning_rate": 8.679419932140765e-06, + "loss": 0.4477, + "step": 3225 + }, + { + "epoch": 2.14, + "learning_rate": 8.67416872048353e-06, + "loss": 0.4294, + "step": 3226 + }, + { + "epoch": 2.14, + "learning_rate": 8.66891788094252e-06, + "loss": 0.4199, + "step": 3227 + }, + { + "epoch": 2.14, + "learning_rate": 8.66366741499147e-06, + "loss": 0.4324, + "step": 3228 + }, + { + "epoch": 2.14, + "learning_rate": 8.658417324104011e-06, + "loss": 0.421, + "step": 3229 + }, + { + "epoch": 2.14, + "learning_rate": 8.653167609753667e-06, + "loss": 0.4279, + "step": 3230 + }, + { + "epoch": 2.14, + "learning_rate": 8.647918273413856e-06, + "loss": 0.4307, + "step": 3231 + }, + { + "epoch": 2.14, + "learning_rate": 8.642669316557893e-06, + "loss": 0.4243, + "step": 3232 + }, + { + "epoch": 2.14, + "learning_rate": 8.637420740658976e-06, + "loss": 0.4207, + "step": 3233 + }, + { + "epoch": 2.14, + "learning_rate": 8.632172547190208e-06, + "loss": 0.4335, + "step": 3234 + }, + { + "epoch": 2.14, + "learning_rate": 8.62692473762458e-06, + "loss": 0.4559, + "step": 3235 + }, + { + "epoch": 2.14, + "learning_rate": 8.621677313434977e-06, + "loss": 0.469, + "step": 3236 + }, + { + "epoch": 2.14, + "learning_rate": 8.616430276094172e-06, + "loss": 0.4419, + "step": 3237 + }, + { + "epoch": 2.14, + "learning_rate": 8.611183627074835e-06, + "loss": 0.4237, + "step": 3238 + }, + { + "epoch": 2.15, + "learning_rate": 8.60593736784952e-06, + "loss": 0.4596, + "step": 3239 + }, + { + "epoch": 2.15, + "learning_rate": 8.600691499890677e-06, + "loss": 0.4473, + "step": 3240 + }, + { + "epoch": 2.15, + "learning_rate": 8.595446024670644e-06, + "loss": 0.4466, + "step": 3241 + }, + { + "epoch": 2.15, + "learning_rate": 8.590200943661652e-06, + "loss": 0.4342, + "step": 3242 + }, + { + "epoch": 2.15, + "learning_rate": 8.584956258335816e-06, + "loss": 0.4702, + "step": 3243 + }, + { + "epoch": 2.15, + "learning_rate": 8.579711970165148e-06, + "loss": 0.4397, + "step": 3244 + }, + { + "epoch": 2.15, + "learning_rate": 8.574468080621533e-06, + "loss": 0.4034, + "step": 3245 + }, + { + "epoch": 2.15, + "learning_rate": 8.56922459117676e-06, + "loss": 0.4269, + "step": 3246 + }, + { + "epoch": 2.15, + "learning_rate": 8.563981503302503e-06, + "loss": 0.4427, + "step": 3247 + }, + { + "epoch": 2.15, + "learning_rate": 8.558738818470317e-06, + "loss": 0.4177, + "step": 3248 + }, + { + "epoch": 2.15, + "learning_rate": 8.553496538151647e-06, + "loss": 0.4476, + "step": 3249 + }, + { + "epoch": 2.15, + "learning_rate": 8.54825466381783e-06, + "loss": 0.4454, + "step": 3250 + }, + { + "epoch": 2.15, + "learning_rate": 8.543013196940075e-06, + "loss": 0.4845, + "step": 3251 + }, + { + "epoch": 2.15, + "learning_rate": 8.53777213898949e-06, + "loss": 0.4478, + "step": 3252 + }, + { + "epoch": 2.15, + "learning_rate": 8.532531491437062e-06, + "loss": 0.4501, + "step": 3253 + }, + { + "epoch": 2.16, + "learning_rate": 8.527291255753666e-06, + "loss": 0.4996, + "step": 3254 + }, + { + "epoch": 2.16, + "learning_rate": 8.522051433410064e-06, + "loss": 0.4275, + "step": 3255 + }, + { + "epoch": 2.16, + "learning_rate": 8.516812025876891e-06, + "loss": 0.3928, + "step": 3256 + }, + { + "epoch": 2.16, + "learning_rate": 8.511573034624673e-06, + "loss": 0.4554, + "step": 3257 + }, + { + "epoch": 2.16, + "learning_rate": 8.50633446112382e-06, + "loss": 0.4638, + "step": 3258 + }, + { + "epoch": 2.16, + "learning_rate": 8.501096306844624e-06, + "loss": 0.421, + "step": 3259 + }, + { + "epoch": 2.16, + "learning_rate": 8.495858573257258e-06, + "loss": 0.4767, + "step": 3260 + }, + { + "epoch": 2.16, + "learning_rate": 8.490621261831781e-06, + "loss": 0.4308, + "step": 3261 + }, + { + "epoch": 2.16, + "learning_rate": 8.485384374038124e-06, + "loss": 0.4348, + "step": 3262 + }, + { + "epoch": 2.16, + "learning_rate": 8.480147911346108e-06, + "loss": 0.4315, + "step": 3263 + }, + { + "epoch": 2.16, + "learning_rate": 8.474911875225432e-06, + "loss": 0.4272, + "step": 3264 + }, + { + "epoch": 2.16, + "learning_rate": 8.469676267145674e-06, + "loss": 0.4492, + "step": 3265 + }, + { + "epoch": 2.16, + "learning_rate": 8.464441088576296e-06, + "loss": 0.4666, + "step": 3266 + }, + { + "epoch": 2.16, + "learning_rate": 8.459206340986637e-06, + "loss": 0.4391, + "step": 3267 + }, + { + "epoch": 2.16, + "learning_rate": 8.453972025845908e-06, + "loss": 0.4728, + "step": 3268 + }, + { + "epoch": 2.17, + "learning_rate": 8.448738144623212e-06, + "loss": 0.4391, + "step": 3269 + }, + { + "epoch": 2.17, + "learning_rate": 8.443504698787517e-06, + "loss": 0.4268, + "step": 3270 + }, + { + "epoch": 2.17, + "learning_rate": 8.438271689807682e-06, + "loss": 0.4355, + "step": 3271 + }, + { + "epoch": 2.17, + "learning_rate": 8.43303911915243e-06, + "loss": 0.4216, + "step": 3272 + }, + { + "epoch": 2.17, + "learning_rate": 8.427806988290374e-06, + "loss": 0.44, + "step": 3273 + }, + { + "epoch": 2.17, + "learning_rate": 8.422575298689993e-06, + "loss": 0.4194, + "step": 3274 + }, + { + "epoch": 2.17, + "learning_rate": 8.417344051819646e-06, + "loss": 0.4382, + "step": 3275 + }, + { + "epoch": 2.17, + "learning_rate": 8.412113249147571e-06, + "loss": 0.4844, + "step": 3276 + }, + { + "epoch": 2.17, + "learning_rate": 8.406882892141875e-06, + "loss": 0.4389, + "step": 3277 + }, + { + "epoch": 2.17, + "learning_rate": 8.401652982270548e-06, + "loss": 0.4001, + "step": 3278 + }, + { + "epoch": 2.17, + "learning_rate": 8.39642352100144e-06, + "loss": 0.4499, + "step": 3279 + }, + { + "epoch": 2.17, + "learning_rate": 8.391194509802294e-06, + "loss": 0.4405, + "step": 3280 + }, + { + "epoch": 2.17, + "learning_rate": 8.385965950140714e-06, + "loss": 0.4058, + "step": 3281 + }, + { + "epoch": 2.17, + "learning_rate": 8.380737843484181e-06, + "loss": 0.4037, + "step": 3282 + }, + { + "epoch": 2.17, + "learning_rate": 8.37551019130005e-06, + "loss": 0.4304, + "step": 3283 + }, + { + "epoch": 2.18, + "learning_rate": 8.37028299505555e-06, + "loss": 0.4666, + "step": 3284 + }, + { + "epoch": 2.18, + "learning_rate": 8.365056256217772e-06, + "loss": 0.4508, + "step": 3285 + }, + { + "epoch": 2.18, + "learning_rate": 8.359829976253687e-06, + "loss": 0.4296, + "step": 3286 + }, + { + "epoch": 2.18, + "learning_rate": 8.354604156630136e-06, + "loss": 0.4795, + "step": 3287 + }, + { + "epoch": 2.18, + "learning_rate": 8.349378798813835e-06, + "loss": 0.4067, + "step": 3288 + }, + { + "epoch": 2.18, + "learning_rate": 8.344153904271363e-06, + "loss": 0.438, + "step": 3289 + }, + { + "epoch": 2.18, + "learning_rate": 8.338929474469177e-06, + "loss": 0.4478, + "step": 3290 + }, + { + "epoch": 2.18, + "learning_rate": 8.33370551087359e-06, + "loss": 0.4555, + "step": 3291 + }, + { + "epoch": 2.18, + "learning_rate": 8.328482014950798e-06, + "loss": 0.4411, + "step": 3292 + }, + { + "epoch": 2.18, + "learning_rate": 8.32325898816686e-06, + "loss": 0.4222, + "step": 3293 + }, + { + "epoch": 2.18, + "learning_rate": 8.318036431987703e-06, + "loss": 0.4431, + "step": 3294 + }, + { + "epoch": 2.18, + "learning_rate": 8.312814347879121e-06, + "loss": 0.4481, + "step": 3295 + }, + { + "epoch": 2.18, + "learning_rate": 8.307592737306786e-06, + "loss": 0.4643, + "step": 3296 + }, + { + "epoch": 2.18, + "learning_rate": 8.302371601736218e-06, + "loss": 0.4421, + "step": 3297 + }, + { + "epoch": 2.19, + "learning_rate": 8.297150942632818e-06, + "loss": 0.4551, + "step": 3298 + }, + { + "epoch": 2.19, + "learning_rate": 8.29193076146185e-06, + "loss": 0.4231, + "step": 3299 + }, + { + "epoch": 2.19, + "learning_rate": 8.286711059688441e-06, + "loss": 0.4674, + "step": 3300 + }, + { + "epoch": 2.19, + "learning_rate": 8.28149183877759e-06, + "loss": 0.4272, + "step": 3301 + }, + { + "epoch": 2.19, + "learning_rate": 8.276273100194154e-06, + "loss": 0.4478, + "step": 3302 + }, + { + "epoch": 2.19, + "learning_rate": 8.271054845402855e-06, + "loss": 0.4773, + "step": 3303 + }, + { + "epoch": 2.19, + "learning_rate": 8.265837075868283e-06, + "loss": 0.4381, + "step": 3304 + }, + { + "epoch": 2.19, + "learning_rate": 8.260619793054894e-06, + "loss": 0.4767, + "step": 3305 + }, + { + "epoch": 2.19, + "learning_rate": 8.255402998427e-06, + "loss": 0.4362, + "step": 3306 + }, + { + "epoch": 2.19, + "learning_rate": 8.250186693448782e-06, + "loss": 0.4714, + "step": 3307 + }, + { + "epoch": 2.19, + "learning_rate": 8.244970879584277e-06, + "loss": 0.4361, + "step": 3308 + }, + { + "epoch": 2.19, + "learning_rate": 8.239755558297392e-06, + "loss": 0.4301, + "step": 3309 + }, + { + "epoch": 2.19, + "learning_rate": 8.234540731051892e-06, + "loss": 0.4509, + "step": 3310 + }, + { + "epoch": 2.19, + "learning_rate": 8.229326399311403e-06, + "loss": 0.4405, + "step": 3311 + }, + { + "epoch": 2.19, + "learning_rate": 8.224112564539413e-06, + "loss": 0.4602, + "step": 3312 + }, + { + "epoch": 2.2, + "learning_rate": 8.21889922819927e-06, + "loss": 0.464, + "step": 3313 + }, + { + "epoch": 2.2, + "learning_rate": 8.21368639175418e-06, + "loss": 0.4464, + "step": 3314 + }, + { + "epoch": 2.2, + "learning_rate": 8.208474056667212e-06, + "loss": 0.4818, + "step": 3315 + }, + { + "epoch": 2.2, + "learning_rate": 8.203262224401295e-06, + "loss": 0.4171, + "step": 3316 + }, + { + "epoch": 2.2, + "learning_rate": 8.198050896419214e-06, + "loss": 0.4621, + "step": 3317 + }, + { + "epoch": 2.2, + "learning_rate": 8.19284007418361e-06, + "loss": 0.4563, + "step": 3318 + }, + { + "epoch": 2.2, + "learning_rate": 8.187629759156994e-06, + "loss": 0.4325, + "step": 3319 + }, + { + "epoch": 2.2, + "learning_rate": 8.182419952801716e-06, + "loss": 0.4425, + "step": 3320 + }, + { + "epoch": 2.2, + "learning_rate": 8.177210656579996e-06, + "loss": 0.4053, + "step": 3321 + }, + { + "epoch": 2.2, + "learning_rate": 8.172001871953912e-06, + "loss": 0.4533, + "step": 3322 + }, + { + "epoch": 2.2, + "learning_rate": 8.166793600385391e-06, + "loss": 0.4385, + "step": 3323 + }, + { + "epoch": 2.2, + "learning_rate": 8.161585843336227e-06, + "loss": 0.448, + "step": 3324 + }, + { + "epoch": 2.2, + "learning_rate": 8.15637860226805e-06, + "loss": 0.3988, + "step": 3325 + }, + { + "epoch": 2.2, + "learning_rate": 8.151171878642365e-06, + "loss": 0.3908, + "step": 3326 + }, + { + "epoch": 2.2, + "learning_rate": 8.145965673920523e-06, + "loss": 0.4152, + "step": 3327 + }, + { + "epoch": 2.21, + "learning_rate": 8.14075998956373e-06, + "loss": 0.4291, + "step": 3328 + }, + { + "epoch": 2.21, + "learning_rate": 8.135554827033044e-06, + "loss": 0.4169, + "step": 3329 + }, + { + "epoch": 2.21, + "learning_rate": 8.130350187789387e-06, + "loss": 0.4492, + "step": 3330 + }, + { + "epoch": 2.21, + "learning_rate": 8.125146073293517e-06, + "loss": 0.4439, + "step": 3331 + }, + { + "epoch": 2.21, + "learning_rate": 8.119942485006058e-06, + "loss": 0.4429, + "step": 3332 + }, + { + "epoch": 2.21, + "learning_rate": 8.114739424387481e-06, + "loss": 0.4352, + "step": 3333 + }, + { + "epoch": 2.21, + "learning_rate": 8.10953689289811e-06, + "loss": 0.436, + "step": 3334 + }, + { + "epoch": 2.21, + "learning_rate": 8.104334891998124e-06, + "loss": 0.4581, + "step": 3335 + }, + { + "epoch": 2.21, + "learning_rate": 8.099133423147547e-06, + "loss": 0.458, + "step": 3336 + }, + { + "epoch": 2.21, + "learning_rate": 8.093932487806254e-06, + "loss": 0.4513, + "step": 3337 + }, + { + "epoch": 2.21, + "learning_rate": 8.088732087433975e-06, + "loss": 0.4382, + "step": 3338 + }, + { + "epoch": 2.21, + "learning_rate": 8.083532223490288e-06, + "loss": 0.3819, + "step": 3339 + }, + { + "epoch": 2.21, + "learning_rate": 8.078332897434617e-06, + "loss": 0.4486, + "step": 3340 + }, + { + "epoch": 2.21, + "learning_rate": 8.07313411072624e-06, + "loss": 0.4267, + "step": 3341 + }, + { + "epoch": 2.21, + "learning_rate": 8.067935864824283e-06, + "loss": 0.452, + "step": 3342 + }, + { + "epoch": 2.22, + "learning_rate": 8.062738161187716e-06, + "loss": 0.4281, + "step": 3343 + }, + { + "epoch": 2.22, + "learning_rate": 8.05754100127536e-06, + "loss": 0.4447, + "step": 3344 + }, + { + "epoch": 2.22, + "learning_rate": 8.052344386545882e-06, + "loss": 0.465, + "step": 3345 + }, + { + "epoch": 2.22, + "learning_rate": 8.0471483184578e-06, + "loss": 0.433, + "step": 3346 + }, + { + "epoch": 2.22, + "learning_rate": 8.041952798469473e-06, + "loss": 0.4692, + "step": 3347 + }, + { + "epoch": 2.22, + "learning_rate": 8.036757828039112e-06, + "loss": 0.4626, + "step": 3348 + }, + { + "epoch": 2.22, + "learning_rate": 8.031563408624767e-06, + "loss": 0.4259, + "step": 3349 + }, + { + "epoch": 2.22, + "learning_rate": 8.026369541684334e-06, + "loss": 0.4502, + "step": 3350 + }, + { + "epoch": 2.22, + "learning_rate": 8.021176228675563e-06, + "loss": 0.4452, + "step": 3351 + }, + { + "epoch": 2.22, + "learning_rate": 8.015983471056038e-06, + "loss": 0.4518, + "step": 3352 + }, + { + "epoch": 2.22, + "learning_rate": 8.010791270283197e-06, + "loss": 0.4352, + "step": 3353 + }, + { + "epoch": 2.22, + "learning_rate": 8.00559962781431e-06, + "loss": 0.4588, + "step": 3354 + }, + { + "epoch": 2.22, + "learning_rate": 8.000408545106492e-06, + "loss": 0.4438, + "step": 3355 + }, + { + "epoch": 2.22, + "learning_rate": 7.995218023616715e-06, + "loss": 0.4948, + "step": 3356 + }, + { + "epoch": 2.22, + "learning_rate": 7.990028064801781e-06, + "loss": 0.432, + "step": 3357 + }, + { + "epoch": 2.23, + "learning_rate": 7.984838670118335e-06, + "loss": 0.4285, + "step": 3358 + }, + { + "epoch": 2.23, + "learning_rate": 7.979649841022872e-06, + "loss": 0.4376, + "step": 3359 + }, + { + "epoch": 2.23, + "learning_rate": 7.97446157897171e-06, + "loss": 0.4321, + "step": 3360 + }, + { + "epoch": 2.23, + "learning_rate": 7.969273885421027e-06, + "loss": 0.3945, + "step": 3361 + }, + { + "epoch": 2.23, + "learning_rate": 7.964086761826832e-06, + "loss": 0.4434, + "step": 3362 + }, + { + "epoch": 2.23, + "learning_rate": 7.958900209644975e-06, + "loss": 0.4359, + "step": 3363 + }, + { + "epoch": 2.23, + "learning_rate": 7.953714230331152e-06, + "loss": 0.4034, + "step": 3364 + }, + { + "epoch": 2.23, + "learning_rate": 7.948528825340891e-06, + "loss": 0.4401, + "step": 3365 + }, + { + "epoch": 2.23, + "learning_rate": 7.943343996129555e-06, + "loss": 0.4259, + "step": 3366 + }, + { + "epoch": 2.23, + "learning_rate": 7.938159744152357e-06, + "loss": 0.4389, + "step": 3367 + }, + { + "epoch": 2.23, + "learning_rate": 7.932976070864338e-06, + "loss": 0.4086, + "step": 3368 + }, + { + "epoch": 2.23, + "learning_rate": 7.927792977720384e-06, + "loss": 0.4316, + "step": 3369 + }, + { + "epoch": 2.23, + "learning_rate": 7.922610466175213e-06, + "loss": 0.4205, + "step": 3370 + }, + { + "epoch": 2.23, + "learning_rate": 7.917428537683386e-06, + "loss": 0.4412, + "step": 3371 + }, + { + "epoch": 2.23, + "learning_rate": 7.912247193699288e-06, + "loss": 0.443, + "step": 3372 + }, + { + "epoch": 2.24, + "learning_rate": 7.907066435677154e-06, + "loss": 0.4321, + "step": 3373 + }, + { + "epoch": 2.24, + "learning_rate": 7.901886265071046e-06, + "loss": 0.4128, + "step": 3374 + }, + { + "epoch": 2.24, + "learning_rate": 7.896706683334863e-06, + "loss": 0.4476, + "step": 3375 + }, + { + "epoch": 2.24, + "learning_rate": 7.891527691922346e-06, + "loss": 0.4417, + "step": 3376 + }, + { + "epoch": 2.24, + "learning_rate": 7.886349292287052e-06, + "loss": 0.4693, + "step": 3377 + }, + { + "epoch": 2.24, + "learning_rate": 7.881171485882392e-06, + "loss": 0.4304, + "step": 3378 + }, + { + "epoch": 2.24, + "learning_rate": 7.8759942741616e-06, + "loss": 0.4458, + "step": 3379 + }, + { + "epoch": 2.24, + "learning_rate": 7.870817658577743e-06, + "loss": 0.4159, + "step": 3380 + }, + { + "epoch": 2.24, + "learning_rate": 7.865641640583725e-06, + "loss": 0.4161, + "step": 3381 + }, + { + "epoch": 2.24, + "learning_rate": 7.860466221632282e-06, + "loss": 0.4145, + "step": 3382 + }, + { + "epoch": 2.24, + "learning_rate": 7.855291403175976e-06, + "loss": 0.4316, + "step": 3383 + }, + { + "epoch": 2.24, + "learning_rate": 7.850117186667206e-06, + "loss": 0.4341, + "step": 3384 + }, + { + "epoch": 2.24, + "learning_rate": 7.844943573558202e-06, + "loss": 0.4535, + "step": 3385 + }, + { + "epoch": 2.24, + "learning_rate": 7.839770565301019e-06, + "loss": 0.4604, + "step": 3386 + }, + { + "epoch": 2.24, + "learning_rate": 7.83459816334755e-06, + "loss": 0.4484, + "step": 3387 + }, + { + "epoch": 2.25, + "learning_rate": 7.829426369149518e-06, + "loss": 0.498, + "step": 3388 + }, + { + "epoch": 2.25, + "learning_rate": 7.82425518415846e-06, + "loss": 0.4422, + "step": 3389 + }, + { + "epoch": 2.25, + "learning_rate": 7.819084609825762e-06, + "loss": 0.474, + "step": 3390 + }, + { + "epoch": 2.25, + "learning_rate": 7.81391464760263e-06, + "loss": 0.4473, + "step": 3391 + }, + { + "epoch": 2.25, + "learning_rate": 7.808745298940096e-06, + "loss": 0.459, + "step": 3392 + }, + { + "epoch": 2.25, + "learning_rate": 7.803576565289023e-06, + "loss": 0.405, + "step": 3393 + }, + { + "epoch": 2.25, + "learning_rate": 7.798408448100106e-06, + "loss": 0.4761, + "step": 3394 + }, + { + "epoch": 2.25, + "learning_rate": 7.793240948823852e-06, + "loss": 0.457, + "step": 3395 + }, + { + "epoch": 2.25, + "learning_rate": 7.788074068910609e-06, + "loss": 0.4435, + "step": 3396 + }, + { + "epoch": 2.25, + "learning_rate": 7.782907809810542e-06, + "loss": 0.4636, + "step": 3397 + }, + { + "epoch": 2.25, + "learning_rate": 7.777742172973655e-06, + "loss": 0.4407, + "step": 3398 + }, + { + "epoch": 2.25, + "learning_rate": 7.772577159849767e-06, + "loss": 0.4257, + "step": 3399 + }, + { + "epoch": 2.25, + "learning_rate": 7.767412771888515e-06, + "loss": 0.4465, + "step": 3400 + }, + { + "epoch": 2.25, + "learning_rate": 7.762249010539372e-06, + "loss": 0.4883, + "step": 3401 + }, + { + "epoch": 2.26, + "learning_rate": 7.757085877251638e-06, + "loss": 0.4491, + "step": 3402 + }, + { + "epoch": 2.26, + "learning_rate": 7.751923373474425e-06, + "loss": 0.3983, + "step": 3403 + }, + { + "epoch": 2.26, + "learning_rate": 7.746761500656676e-06, + "loss": 0.4486, + "step": 3404 + }, + { + "epoch": 2.26, + "learning_rate": 7.741600260247155e-06, + "loss": 0.4098, + "step": 3405 + }, + { + "epoch": 2.26, + "learning_rate": 7.73643965369445e-06, + "loss": 0.4879, + "step": 3406 + }, + { + "epoch": 2.26, + "learning_rate": 7.731279682446964e-06, + "loss": 0.4209, + "step": 3407 + }, + { + "epoch": 2.26, + "learning_rate": 7.726120347952932e-06, + "loss": 0.4568, + "step": 3408 + }, + { + "epoch": 2.26, + "learning_rate": 7.720961651660406e-06, + "loss": 0.4212, + "step": 3409 + }, + { + "epoch": 2.26, + "learning_rate": 7.715803595017257e-06, + "loss": 0.4247, + "step": 3410 + }, + { + "epoch": 2.26, + "learning_rate": 7.71064617947118e-06, + "loss": 0.4458, + "step": 3411 + }, + { + "epoch": 2.26, + "learning_rate": 7.705489406469684e-06, + "loss": 0.4574, + "step": 3412 + }, + { + "epoch": 2.26, + "learning_rate": 7.700333277460104e-06, + "loss": 0.4325, + "step": 3413 + }, + { + "epoch": 2.26, + "learning_rate": 7.695177793889593e-06, + "loss": 0.392, + "step": 3414 + }, + { + "epoch": 2.26, + "learning_rate": 7.69002295720512e-06, + "loss": 0.4819, + "step": 3415 + }, + { + "epoch": 2.26, + "learning_rate": 7.684868768853472e-06, + "loss": 0.4743, + "step": 3416 + }, + { + "epoch": 2.27, + "learning_rate": 7.679715230281265e-06, + "loss": 0.432, + "step": 3417 + }, + { + "epoch": 2.27, + "learning_rate": 7.674562342934914e-06, + "loss": 0.4601, + "step": 3418 + }, + { + "epoch": 2.27, + "learning_rate": 7.669410108260664e-06, + "loss": 0.4758, + "step": 3419 + }, + { + "epoch": 2.27, + "learning_rate": 7.664258527704576e-06, + "loss": 0.4441, + "step": 3420 + }, + { + "epoch": 2.27, + "learning_rate": 7.659107602712524e-06, + "loss": 0.4276, + "step": 3421 + }, + { + "epoch": 2.27, + "learning_rate": 7.6539573347302e-06, + "loss": 0.4361, + "step": 3422 + }, + { + "epoch": 2.27, + "learning_rate": 7.648807725203112e-06, + "loss": 0.441, + "step": 3423 + }, + { + "epoch": 2.27, + "learning_rate": 7.64365877557658e-06, + "loss": 0.4189, + "step": 3424 + }, + { + "epoch": 2.27, + "learning_rate": 7.638510487295738e-06, + "loss": 0.4356, + "step": 3425 + }, + { + "epoch": 2.27, + "learning_rate": 7.633362861805544e-06, + "loss": 0.4215, + "step": 3426 + }, + { + "epoch": 2.27, + "learning_rate": 7.628215900550758e-06, + "loss": 0.4181, + "step": 3427 + }, + { + "epoch": 2.27, + "learning_rate": 7.623069604975966e-06, + "loss": 0.4294, + "step": 3428 + }, + { + "epoch": 2.27, + "learning_rate": 7.617923976525549e-06, + "loss": 0.443, + "step": 3429 + }, + { + "epoch": 2.27, + "learning_rate": 7.61277901664372e-06, + "loss": 0.4298, + "step": 3430 + }, + { + "epoch": 2.27, + "learning_rate": 7.607634726774491e-06, + "loss": 0.4353, + "step": 3431 + }, + { + "epoch": 2.28, + "learning_rate": 7.602491108361695e-06, + "loss": 0.4528, + "step": 3432 + }, + { + "epoch": 2.28, + "learning_rate": 7.597348162848972e-06, + "loss": 0.4203, + "step": 3433 + }, + { + "epoch": 2.28, + "learning_rate": 7.592205891679777e-06, + "loss": 0.4689, + "step": 3434 + }, + { + "epoch": 2.28, + "learning_rate": 7.587064296297364e-06, + "loss": 0.4104, + "step": 3435 + }, + { + "epoch": 2.28, + "learning_rate": 7.5819233781448105e-06, + "loss": 0.4375, + "step": 3436 + }, + { + "epoch": 2.28, + "learning_rate": 7.576783138665e-06, + "loss": 0.4357, + "step": 3437 + }, + { + "epoch": 2.28, + "learning_rate": 7.571643579300622e-06, + "loss": 0.4485, + "step": 3438 + }, + { + "epoch": 2.28, + "learning_rate": 7.5665047014941805e-06, + "loss": 0.4608, + "step": 3439 + }, + { + "epoch": 2.28, + "learning_rate": 7.56136650668799e-06, + "loss": 0.4587, + "step": 3440 + }, + { + "epoch": 2.28, + "learning_rate": 7.55622899632416e-06, + "loss": 0.4229, + "step": 3441 + }, + { + "epoch": 2.28, + "learning_rate": 7.55109217184462e-06, + "loss": 0.4576, + "step": 3442 + }, + { + "epoch": 2.28, + "learning_rate": 7.545956034691104e-06, + "loss": 0.4087, + "step": 3443 + }, + { + "epoch": 2.28, + "learning_rate": 7.540820586305153e-06, + "loss": 0.4538, + "step": 3444 + }, + { + "epoch": 2.28, + "learning_rate": 7.535685828128117e-06, + "loss": 0.4152, + "step": 3445 + }, + { + "epoch": 2.28, + "learning_rate": 7.530551761601147e-06, + "loss": 0.4219, + "step": 3446 + }, + { + "epoch": 2.29, + "learning_rate": 7.525418388165202e-06, + "loss": 0.4365, + "step": 3447 + }, + { + "epoch": 2.29, + "learning_rate": 7.520285709261049e-06, + "loss": 0.4476, + "step": 3448 + }, + { + "epoch": 2.29, + "learning_rate": 7.5151537263292575e-06, + "loss": 0.4355, + "step": 3449 + }, + { + "epoch": 2.29, + "learning_rate": 7.510022440810203e-06, + "loss": 0.4305, + "step": 3450 + }, + { + "epoch": 2.29, + "learning_rate": 7.504891854144066e-06, + "loss": 0.436, + "step": 3451 + }, + { + "epoch": 2.29, + "learning_rate": 7.499761967770827e-06, + "loss": 0.4559, + "step": 3452 + }, + { + "epoch": 2.29, + "learning_rate": 7.494632783130271e-06, + "loss": 0.4461, + "step": 3453 + }, + { + "epoch": 2.29, + "learning_rate": 7.489504301661992e-06, + "loss": 0.4525, + "step": 3454 + }, + { + "epoch": 2.29, + "learning_rate": 7.48437652480538e-06, + "loss": 0.4435, + "step": 3455 + }, + { + "epoch": 2.29, + "learning_rate": 7.4792494539996285e-06, + "loss": 0.4356, + "step": 3456 + }, + { + "epoch": 2.29, + "learning_rate": 7.474123090683738e-06, + "loss": 0.431, + "step": 3457 + }, + { + "epoch": 2.29, + "learning_rate": 7.468997436296501e-06, + "loss": 0.4583, + "step": 3458 + }, + { + "epoch": 2.29, + "learning_rate": 7.4638724922765185e-06, + "loss": 0.3965, + "step": 3459 + }, + { + "epoch": 2.29, + "learning_rate": 7.458748260062187e-06, + "loss": 0.4467, + "step": 3460 + }, + { + "epoch": 2.29, + "learning_rate": 7.453624741091712e-06, + "loss": 0.4076, + "step": 3461 + }, + { + "epoch": 2.3, + "learning_rate": 7.448501936803087e-06, + "loss": 0.438, + "step": 3462 + }, + { + "epoch": 2.3, + "learning_rate": 7.443379848634118e-06, + "loss": 0.4097, + "step": 3463 + }, + { + "epoch": 2.3, + "learning_rate": 7.438258478022393e-06, + "loss": 0.4785, + "step": 3464 + }, + { + "epoch": 2.3, + "learning_rate": 7.433137826405314e-06, + "loss": 0.4716, + "step": 3465 + }, + { + "epoch": 2.3, + "learning_rate": 7.428017895220076e-06, + "loss": 0.4587, + "step": 3466 + }, + { + "epoch": 2.3, + "learning_rate": 7.42289868590367e-06, + "loss": 0.4538, + "step": 3467 + }, + { + "epoch": 2.3, + "learning_rate": 7.4177801998928864e-06, + "loss": 0.4278, + "step": 3468 + }, + { + "epoch": 2.3, + "learning_rate": 7.412662438624316e-06, + "loss": 0.4138, + "step": 3469 + }, + { + "epoch": 2.3, + "learning_rate": 7.407545403534334e-06, + "loss": 0.4657, + "step": 3470 + }, + { + "epoch": 2.3, + "learning_rate": 7.402429096059125e-06, + "loss": 0.4418, + "step": 3471 + }, + { + "epoch": 2.3, + "learning_rate": 7.397313517634659e-06, + "loss": 0.4446, + "step": 3472 + }, + { + "epoch": 2.3, + "learning_rate": 7.392198669696715e-06, + "loss": 0.4365, + "step": 3473 + }, + { + "epoch": 2.3, + "learning_rate": 7.387084553680859e-06, + "loss": 0.4334, + "step": 3474 + }, + { + "epoch": 2.3, + "learning_rate": 7.381971171022442e-06, + "loss": 0.4196, + "step": 3475 + }, + { + "epoch": 2.3, + "learning_rate": 7.376858523156624e-06, + "loss": 0.4396, + "step": 3476 + }, + { + "epoch": 2.31, + "learning_rate": 7.371746611518352e-06, + "loss": 0.4371, + "step": 3477 + }, + { + "epoch": 2.31, + "learning_rate": 7.366635437542368e-06, + "loss": 0.436, + "step": 3478 + }, + { + "epoch": 2.31, + "learning_rate": 7.361525002663209e-06, + "loss": 0.412, + "step": 3479 + }, + { + "epoch": 2.31, + "learning_rate": 7.356415308315201e-06, + "loss": 0.4762, + "step": 3480 + }, + { + "epoch": 2.31, + "learning_rate": 7.35130635593246e-06, + "loss": 0.4307, + "step": 3481 + }, + { + "epoch": 2.31, + "learning_rate": 7.3461981469489e-06, + "loss": 0.4331, + "step": 3482 + }, + { + "epoch": 2.31, + "learning_rate": 7.341090682798222e-06, + "loss": 0.4192, + "step": 3483 + }, + { + "epoch": 2.31, + "learning_rate": 7.335983964913919e-06, + "loss": 0.4222, + "step": 3484 + }, + { + "epoch": 2.31, + "learning_rate": 7.3308779947292776e-06, + "loss": 0.4051, + "step": 3485 + }, + { + "epoch": 2.31, + "learning_rate": 7.325772773677372e-06, + "loss": 0.4489, + "step": 3486 + }, + { + "epoch": 2.31, + "learning_rate": 7.320668303191062e-06, + "loss": 0.4374, + "step": 3487 + }, + { + "epoch": 2.31, + "learning_rate": 7.315564584703002e-06, + "loss": 0.4563, + "step": 3488 + }, + { + "epoch": 2.31, + "learning_rate": 7.310461619645634e-06, + "loss": 0.4302, + "step": 3489 + }, + { + "epoch": 2.31, + "learning_rate": 7.305359409451192e-06, + "loss": 0.4415, + "step": 3490 + }, + { + "epoch": 2.31, + "learning_rate": 7.300257955551691e-06, + "loss": 0.4531, + "step": 3491 + }, + { + "epoch": 2.32, + "learning_rate": 7.29515725937894e-06, + "loss": 0.4458, + "step": 3492 + }, + { + "epoch": 2.32, + "learning_rate": 7.29005732236453e-06, + "loss": 0.4209, + "step": 3493 + }, + { + "epoch": 2.32, + "learning_rate": 7.284958145939842e-06, + "loss": 0.4142, + "step": 3494 + }, + { + "epoch": 2.32, + "learning_rate": 7.279859731536045e-06, + "loss": 0.4405, + "step": 3495 + }, + { + "epoch": 2.32, + "learning_rate": 7.274762080584091e-06, + "loss": 0.4735, + "step": 3496 + }, + { + "epoch": 2.32, + "learning_rate": 7.269665194514721e-06, + "loss": 0.4689, + "step": 3497 + }, + { + "epoch": 2.32, + "learning_rate": 7.26456907475846e-06, + "loss": 0.4187, + "step": 3498 + }, + { + "epoch": 2.32, + "learning_rate": 7.2594737227456125e-06, + "loss": 0.4564, + "step": 3499 + }, + { + "epoch": 2.32, + "learning_rate": 7.2543791399062755e-06, + "loss": 0.4354, + "step": 3500 + }, + { + "epoch": 2.32, + "learning_rate": 7.2492853276703275e-06, + "loss": 0.4134, + "step": 3501 + }, + { + "epoch": 2.32, + "learning_rate": 7.244192287467429e-06, + "loss": 0.4516, + "step": 3502 + }, + { + "epoch": 2.32, + "learning_rate": 7.23910002072703e-06, + "loss": 0.489, + "step": 3503 + }, + { + "epoch": 2.32, + "learning_rate": 7.2340085288783504e-06, + "loss": 0.4368, + "step": 3504 + }, + { + "epoch": 2.32, + "learning_rate": 7.228917813350404e-06, + "loss": 0.45, + "step": 3505 + }, + { + "epoch": 2.33, + "learning_rate": 7.223827875571981e-06, + "loss": 0.4866, + "step": 3506 + }, + { + "epoch": 2.33, + "learning_rate": 7.21873871697166e-06, + "loss": 0.4565, + "step": 3507 + }, + { + "epoch": 2.33, + "learning_rate": 7.213650338977795e-06, + "loss": 0.4598, + "step": 3508 + }, + { + "epoch": 2.33, + "learning_rate": 7.208562743018525e-06, + "loss": 0.4587, + "step": 3509 + }, + { + "epoch": 2.33, + "learning_rate": 7.203475930521764e-06, + "loss": 0.4419, + "step": 3510 + }, + { + "epoch": 2.33, + "learning_rate": 7.198389902915206e-06, + "loss": 0.4119, + "step": 3511 + }, + { + "epoch": 2.33, + "learning_rate": 7.193304661626333e-06, + "loss": 0.4405, + "step": 3512 + }, + { + "epoch": 2.33, + "learning_rate": 7.188220208082398e-06, + "loss": 0.4706, + "step": 3513 + }, + { + "epoch": 2.33, + "learning_rate": 7.183136543710436e-06, + "loss": 0.4529, + "step": 3514 + }, + { + "epoch": 2.33, + "learning_rate": 7.1780536699372685e-06, + "loss": 0.4359, + "step": 3515 + }, + { + "epoch": 2.33, + "learning_rate": 7.172971588189475e-06, + "loss": 0.4417, + "step": 3516 + }, + { + "epoch": 2.33, + "learning_rate": 7.167890299893432e-06, + "loss": 0.4259, + "step": 3517 + }, + { + "epoch": 2.33, + "learning_rate": 7.162809806475283e-06, + "loss": 0.4407, + "step": 3518 + }, + { + "epoch": 2.33, + "learning_rate": 7.157730109360954e-06, + "loss": 0.4335, + "step": 3519 + }, + { + "epoch": 2.33, + "learning_rate": 7.1526512099761424e-06, + "loss": 0.4171, + "step": 3520 + }, + { + "epoch": 2.34, + "learning_rate": 7.147573109746331e-06, + "loss": 0.4404, + "step": 3521 + }, + { + "epoch": 2.34, + "learning_rate": 7.142495810096762e-06, + "loss": 0.4391, + "step": 3522 + }, + { + "epoch": 2.34, + "learning_rate": 7.137419312452469e-06, + "loss": 0.4373, + "step": 3523 + }, + { + "epoch": 2.34, + "learning_rate": 7.132343618238251e-06, + "loss": 0.4633, + "step": 3524 + }, + { + "epoch": 2.34, + "learning_rate": 7.127268728878687e-06, + "loss": 0.4483, + "step": 3525 + }, + { + "epoch": 2.34, + "learning_rate": 7.122194645798128e-06, + "loss": 0.3956, + "step": 3526 + }, + { + "epoch": 2.34, + "learning_rate": 7.117121370420695e-06, + "loss": 0.412, + "step": 3527 + }, + { + "epoch": 2.34, + "learning_rate": 7.112048904170288e-06, + "loss": 0.4419, + "step": 3528 + }, + { + "epoch": 2.34, + "learning_rate": 7.106977248470577e-06, + "loss": 0.4381, + "step": 3529 + }, + { + "epoch": 2.34, + "learning_rate": 7.101906404745006e-06, + "loss": 0.4403, + "step": 3530 + }, + { + "epoch": 2.34, + "learning_rate": 7.096836374416789e-06, + "loss": 0.4569, + "step": 3531 + }, + { + "epoch": 2.34, + "learning_rate": 7.091767158908916e-06, + "loss": 0.4104, + "step": 3532 + }, + { + "epoch": 2.34, + "learning_rate": 7.0866987596441394e-06, + "loss": 0.4476, + "step": 3533 + }, + { + "epoch": 2.34, + "learning_rate": 7.081631178044992e-06, + "loss": 0.4346, + "step": 3534 + }, + { + "epoch": 2.34, + "learning_rate": 7.076564415533774e-06, + "loss": 0.4343, + "step": 3535 + }, + { + "epoch": 2.35, + "learning_rate": 7.071498473532554e-06, + "loss": 0.4373, + "step": 3536 + }, + { + "epoch": 2.35, + "learning_rate": 7.066433353463171e-06, + "loss": 0.452, + "step": 3537 + }, + { + "epoch": 2.35, + "learning_rate": 7.061369056747239e-06, + "loss": 0.4414, + "step": 3538 + }, + { + "epoch": 2.35, + "learning_rate": 7.056305584806127e-06, + "loss": 0.4686, + "step": 3539 + }, + { + "epoch": 2.35, + "learning_rate": 7.0512429390609825e-06, + "loss": 0.4706, + "step": 3540 + }, + { + "epoch": 2.35, + "learning_rate": 7.046181120932726e-06, + "loss": 0.4547, + "step": 3541 + }, + { + "epoch": 2.35, + "learning_rate": 7.041120131842035e-06, + "loss": 0.4455, + "step": 3542 + }, + { + "epoch": 2.35, + "learning_rate": 7.03605997320936e-06, + "loss": 0.4433, + "step": 3543 + }, + { + "epoch": 2.35, + "learning_rate": 7.031000646454922e-06, + "loss": 0.4661, + "step": 3544 + }, + { + "epoch": 2.35, + "learning_rate": 7.0259421529986946e-06, + "loss": 0.4327, + "step": 3545 + }, + { + "epoch": 2.35, + "learning_rate": 7.020884494260431e-06, + "loss": 0.4314, + "step": 3546 + }, + { + "epoch": 2.35, + "learning_rate": 7.015827671659647e-06, + "loss": 0.4583, + "step": 3547 + }, + { + "epoch": 2.35, + "learning_rate": 7.010771686615618e-06, + "loss": 0.4411, + "step": 3548 + }, + { + "epoch": 2.35, + "learning_rate": 7.005716540547399e-06, + "loss": 0.4061, + "step": 3549 + }, + { + "epoch": 2.35, + "learning_rate": 7.00066223487379e-06, + "loss": 0.4522, + "step": 3550 + }, + { + "epoch": 2.36, + "learning_rate": 6.995608771013365e-06, + "loss": 0.4227, + "step": 3551 + }, + { + "epoch": 2.36, + "learning_rate": 6.990556150384464e-06, + "loss": 0.426, + "step": 3552 + }, + { + "epoch": 2.36, + "learning_rate": 6.985504374405188e-06, + "loss": 0.4526, + "step": 3553 + }, + { + "epoch": 2.36, + "learning_rate": 6.9804534444934005e-06, + "loss": 0.4475, + "step": 3554 + }, + { + "epoch": 2.36, + "learning_rate": 6.975403362066727e-06, + "loss": 0.4351, + "step": 3555 + }, + { + "epoch": 2.36, + "learning_rate": 6.970354128542553e-06, + "loss": 0.4514, + "step": 3556 + }, + { + "epoch": 2.36, + "learning_rate": 6.965305745338033e-06, + "loss": 0.4737, + "step": 3557 + }, + { + "epoch": 2.36, + "learning_rate": 6.960258213870075e-06, + "loss": 0.4709, + "step": 3558 + }, + { + "epoch": 2.36, + "learning_rate": 6.955211535555353e-06, + "loss": 0.4494, + "step": 3559 + }, + { + "epoch": 2.36, + "learning_rate": 6.9501657118102994e-06, + "loss": 0.4359, + "step": 3560 + }, + { + "epoch": 2.36, + "learning_rate": 6.945120744051108e-06, + "loss": 0.4241, + "step": 3561 + }, + { + "epoch": 2.36, + "learning_rate": 6.9400766336937286e-06, + "loss": 0.4718, + "step": 3562 + }, + { + "epoch": 2.36, + "learning_rate": 6.935033382153875e-06, + "loss": 0.4306, + "step": 3563 + }, + { + "epoch": 2.36, + "learning_rate": 6.92999099084702e-06, + "loss": 0.4353, + "step": 3564 + }, + { + "epoch": 2.36, + "learning_rate": 6.92494946118839e-06, + "loss": 0.449, + "step": 3565 + }, + { + "epoch": 2.37, + "learning_rate": 6.919908794592973e-06, + "loss": 0.4529, + "step": 3566 + }, + { + "epoch": 2.37, + "learning_rate": 6.914868992475521e-06, + "loss": 0.445, + "step": 3567 + }, + { + "epoch": 2.37, + "learning_rate": 6.909830056250527e-06, + "loss": 0.459, + "step": 3568 + }, + { + "epoch": 2.37, + "learning_rate": 6.904791987332256e-06, + "loss": 0.4115, + "step": 3569 + }, + { + "epoch": 2.37, + "learning_rate": 6.899754787134725e-06, + "loss": 0.4252, + "step": 3570 + }, + { + "epoch": 2.37, + "learning_rate": 6.894718457071703e-06, + "loss": 0.4236, + "step": 3571 + }, + { + "epoch": 2.37, + "learning_rate": 6.889682998556724e-06, + "loss": 0.4085, + "step": 3572 + }, + { + "epoch": 2.37, + "learning_rate": 6.88464841300307e-06, + "loss": 0.4205, + "step": 3573 + }, + { + "epoch": 2.37, + "learning_rate": 6.8796147018237734e-06, + "loss": 0.421, + "step": 3574 + }, + { + "epoch": 2.37, + "learning_rate": 6.874581866431633e-06, + "loss": 0.4488, + "step": 3575 + }, + { + "epoch": 2.37, + "learning_rate": 6.869549908239198e-06, + "loss": 0.4126, + "step": 3576 + }, + { + "epoch": 2.37, + "learning_rate": 6.864518828658767e-06, + "loss": 0.4676, + "step": 3577 + }, + { + "epoch": 2.37, + "learning_rate": 6.8594886291024e-06, + "loss": 0.4283, + "step": 3578 + }, + { + "epoch": 2.37, + "learning_rate": 6.854459310981894e-06, + "loss": 0.4777, + "step": 3579 + }, + { + "epoch": 2.37, + "learning_rate": 6.849430875708818e-06, + "loss": 0.4633, + "step": 3580 + }, + { + "epoch": 2.38, + "learning_rate": 6.84440332469448e-06, + "loss": 0.4546, + "step": 3581 + }, + { + "epoch": 2.38, + "learning_rate": 6.839376659349945e-06, + "loss": 0.4471, + "step": 3582 + }, + { + "epoch": 2.38, + "learning_rate": 6.8343508810860325e-06, + "loss": 0.4466, + "step": 3583 + }, + { + "epoch": 2.38, + "learning_rate": 6.8293259913133115e-06, + "loss": 0.4365, + "step": 3584 + }, + { + "epoch": 2.38, + "learning_rate": 6.82430199144209e-06, + "loss": 0.4517, + "step": 3585 + }, + { + "epoch": 2.38, + "learning_rate": 6.819278882882442e-06, + "loss": 0.4448, + "step": 3586 + }, + { + "epoch": 2.38, + "learning_rate": 6.8142566670441835e-06, + "loss": 0.4237, + "step": 3587 + }, + { + "epoch": 2.38, + "learning_rate": 6.809235345336882e-06, + "loss": 0.4277, + "step": 3588 + }, + { + "epoch": 2.38, + "learning_rate": 6.8042149191698526e-06, + "loss": 0.492, + "step": 3589 + }, + { + "epoch": 2.38, + "learning_rate": 6.799195389952163e-06, + "loss": 0.4245, + "step": 3590 + }, + { + "epoch": 2.38, + "learning_rate": 6.794176759092622e-06, + "loss": 0.4467, + "step": 3591 + }, + { + "epoch": 2.38, + "learning_rate": 6.789159027999792e-06, + "loss": 0.4284, + "step": 3592 + }, + { + "epoch": 2.38, + "learning_rate": 6.784142198081983e-06, + "loss": 0.4248, + "step": 3593 + }, + { + "epoch": 2.38, + "learning_rate": 6.779126270747247e-06, + "loss": 0.4149, + "step": 3594 + }, + { + "epoch": 2.38, + "learning_rate": 6.77411124740339e-06, + "loss": 0.4378, + "step": 3595 + }, + { + "epoch": 2.39, + "learning_rate": 6.7690971294579596e-06, + "loss": 0.4382, + "step": 3596 + }, + { + "epoch": 2.39, + "learning_rate": 6.764083918318246e-06, + "loss": 0.4614, + "step": 3597 + }, + { + "epoch": 2.39, + "learning_rate": 6.759071615391293e-06, + "loss": 0.4236, + "step": 3598 + }, + { + "epoch": 2.39, + "learning_rate": 6.754060222083883e-06, + "loss": 0.4642, + "step": 3599 + }, + { + "epoch": 2.39, + "learning_rate": 6.7490497398025444e-06, + "loss": 0.4343, + "step": 3600 + }, + { + "epoch": 2.39, + "learning_rate": 6.744040169953559e-06, + "loss": 0.462, + "step": 3601 + }, + { + "epoch": 2.39, + "learning_rate": 6.739031513942933e-06, + "loss": 0.4474, + "step": 3602 + }, + { + "epoch": 2.39, + "learning_rate": 6.734023773176434e-06, + "loss": 0.4472, + "step": 3603 + }, + { + "epoch": 2.39, + "learning_rate": 6.729016949059566e-06, + "loss": 0.4427, + "step": 3604 + }, + { + "epoch": 2.39, + "learning_rate": 6.724011042997576e-06, + "loss": 0.4836, + "step": 3605 + }, + { + "epoch": 2.39, + "learning_rate": 6.719006056395452e-06, + "loss": 0.4896, + "step": 3606 + }, + { + "epoch": 2.39, + "learning_rate": 6.7140019906579305e-06, + "loss": 0.4458, + "step": 3607 + }, + { + "epoch": 2.39, + "learning_rate": 6.708998847189474e-06, + "loss": 0.4098, + "step": 3608 + }, + { + "epoch": 2.39, + "learning_rate": 6.703996627394303e-06, + "loss": 0.4333, + "step": 3609 + }, + { + "epoch": 2.4, + "learning_rate": 6.698995332676375e-06, + "loss": 0.4708, + "step": 3610 + }, + { + "epoch": 2.4, + "learning_rate": 6.693994964439379e-06, + "loss": 0.4483, + "step": 3611 + }, + { + "epoch": 2.4, + "learning_rate": 6.688995524086754e-06, + "loss": 0.4463, + "step": 3612 + }, + { + "epoch": 2.4, + "learning_rate": 6.683997013021679e-06, + "loss": 0.4482, + "step": 3613 + }, + { + "epoch": 2.4, + "learning_rate": 6.678999432647057e-06, + "loss": 0.4296, + "step": 3614 + }, + { + "epoch": 2.4, + "learning_rate": 6.674002784365547e-06, + "loss": 0.4126, + "step": 3615 + }, + { + "epoch": 2.4, + "learning_rate": 6.669007069579538e-06, + "loss": 0.4426, + "step": 3616 + }, + { + "epoch": 2.4, + "learning_rate": 6.664012289691162e-06, + "loss": 0.4256, + "step": 3617 + }, + { + "epoch": 2.4, + "learning_rate": 6.659018446102283e-06, + "loss": 0.4596, + "step": 3618 + }, + { + "epoch": 2.4, + "learning_rate": 6.65402554021451e-06, + "loss": 0.4325, + "step": 3619 + }, + { + "epoch": 2.4, + "learning_rate": 6.649033573429178e-06, + "loss": 0.4374, + "step": 3620 + }, + { + "epoch": 2.4, + "learning_rate": 6.644042547147362e-06, + "loss": 0.4303, + "step": 3621 + }, + { + "epoch": 2.4, + "learning_rate": 6.639052462769881e-06, + "loss": 0.4271, + "step": 3622 + }, + { + "epoch": 2.4, + "learning_rate": 6.634063321697282e-06, + "loss": 0.403, + "step": 3623 + }, + { + "epoch": 2.4, + "learning_rate": 6.629075125329849e-06, + "loss": 0.428, + "step": 3624 + }, + { + "epoch": 2.41, + "learning_rate": 6.6240878750676e-06, + "loss": 0.4293, + "step": 3625 + }, + { + "epoch": 2.41, + "learning_rate": 6.619101572310287e-06, + "loss": 0.4489, + "step": 3626 + }, + { + "epoch": 2.41, + "learning_rate": 6.6141162184574e-06, + "loss": 0.4596, + "step": 3627 + }, + { + "epoch": 2.41, + "learning_rate": 6.6091318149081595e-06, + "loss": 0.4322, + "step": 3628 + }, + { + "epoch": 2.41, + "learning_rate": 6.60414836306152e-06, + "loss": 0.442, + "step": 3629 + }, + { + "epoch": 2.41, + "learning_rate": 6.5991658643161696e-06, + "loss": 0.4025, + "step": 3630 + }, + { + "epoch": 2.41, + "learning_rate": 6.594184320070525e-06, + "loss": 0.4346, + "step": 3631 + }, + { + "epoch": 2.41, + "learning_rate": 6.58920373172274e-06, + "loss": 0.4604, + "step": 3632 + }, + { + "epoch": 2.41, + "learning_rate": 6.584224100670698e-06, + "loss": 0.4403, + "step": 3633 + }, + { + "epoch": 2.41, + "learning_rate": 6.5792454283120136e-06, + "loss": 0.4435, + "step": 3634 + }, + { + "epoch": 2.41, + "learning_rate": 6.574267716044033e-06, + "loss": 0.431, + "step": 3635 + }, + { + "epoch": 2.41, + "learning_rate": 6.569290965263835e-06, + "loss": 0.4122, + "step": 3636 + }, + { + "epoch": 2.41, + "learning_rate": 6.564315177368222e-06, + "loss": 0.4402, + "step": 3637 + }, + { + "epoch": 2.41, + "learning_rate": 6.559340353753729e-06, + "loss": 0.4413, + "step": 3638 + }, + { + "epoch": 2.41, + "learning_rate": 6.5543664958166266e-06, + "loss": 0.4892, + "step": 3639 + }, + { + "epoch": 2.42, + "learning_rate": 6.549393604952906e-06, + "loss": 0.4502, + "step": 3640 + }, + { + "epoch": 2.42, + "learning_rate": 6.544421682558293e-06, + "loss": 0.4277, + "step": 3641 + }, + { + "epoch": 2.42, + "learning_rate": 6.539450730028238e-06, + "loss": 0.4171, + "step": 3642 + }, + { + "epoch": 2.42, + "learning_rate": 6.534480748757917e-06, + "loss": 0.4482, + "step": 3643 + }, + { + "epoch": 2.42, + "learning_rate": 6.52951174014224e-06, + "loss": 0.4598, + "step": 3644 + }, + { + "epoch": 2.42, + "learning_rate": 6.524543705575839e-06, + "loss": 0.4523, + "step": 3645 + }, + { + "epoch": 2.42, + "learning_rate": 6.519576646453075e-06, + "loss": 0.4233, + "step": 3646 + }, + { + "epoch": 2.42, + "learning_rate": 6.514610564168034e-06, + "loss": 0.4775, + "step": 3647 + }, + { + "epoch": 2.42, + "learning_rate": 6.509645460114532e-06, + "loss": 0.418, + "step": 3648 + }, + { + "epoch": 2.42, + "learning_rate": 6.504681335686098e-06, + "loss": 0.4211, + "step": 3649 + }, + { + "epoch": 2.42, + "learning_rate": 6.499718192275999e-06, + "loss": 0.42, + "step": 3650 + }, + { + "epoch": 2.42, + "learning_rate": 6.494756031277224e-06, + "loss": 0.485, + "step": 3651 + }, + { + "epoch": 2.42, + "learning_rate": 6.489794854082483e-06, + "loss": 0.4125, + "step": 3652 + }, + { + "epoch": 2.42, + "learning_rate": 6.484834662084217e-06, + "loss": 0.4401, + "step": 3653 + }, + { + "epoch": 2.42, + "learning_rate": 6.479875456674576e-06, + "loss": 0.4453, + "step": 3654 + }, + { + "epoch": 2.43, + "learning_rate": 6.474917239245445e-06, + "loss": 0.4371, + "step": 3655 + }, + { + "epoch": 2.43, + "learning_rate": 6.469960011188431e-06, + "loss": 0.4473, + "step": 3656 + }, + { + "epoch": 2.43, + "learning_rate": 6.465003773894859e-06, + "loss": 0.4259, + "step": 3657 + }, + { + "epoch": 2.43, + "learning_rate": 6.460048528755778e-06, + "loss": 0.4485, + "step": 3658 + }, + { + "epoch": 2.43, + "learning_rate": 6.455094277161964e-06, + "loss": 0.4598, + "step": 3659 + }, + { + "epoch": 2.43, + "learning_rate": 6.450141020503902e-06, + "loss": 0.4259, + "step": 3660 + }, + { + "epoch": 2.43, + "learning_rate": 6.445188760171806e-06, + "loss": 0.4785, + "step": 3661 + }, + { + "epoch": 2.43, + "learning_rate": 6.4402374975556085e-06, + "loss": 0.4534, + "step": 3662 + }, + { + "epoch": 2.43, + "learning_rate": 6.435287234044965e-06, + "loss": 0.45, + "step": 3663 + }, + { + "epoch": 2.43, + "learning_rate": 6.4303379710292445e-06, + "loss": 0.4328, + "step": 3664 + }, + { + "epoch": 2.43, + "learning_rate": 6.425389709897543e-06, + "loss": 0.4413, + "step": 3665 + }, + { + "epoch": 2.43, + "learning_rate": 6.420442452038668e-06, + "loss": 0.3978, + "step": 3666 + }, + { + "epoch": 2.43, + "learning_rate": 6.415496198841147e-06, + "loss": 0.424, + "step": 3667 + }, + { + "epoch": 2.43, + "learning_rate": 6.410550951693228e-06, + "loss": 0.4175, + "step": 3668 + }, + { + "epoch": 2.43, + "learning_rate": 6.405606711982877e-06, + "loss": 0.4138, + "step": 3669 + }, + { + "epoch": 2.44, + "learning_rate": 6.400663481097774e-06, + "loss": 0.4631, + "step": 3670 + }, + { + "epoch": 2.44, + "learning_rate": 6.395721260425321e-06, + "loss": 0.4291, + "step": 3671 + }, + { + "epoch": 2.44, + "learning_rate": 6.390780051352627e-06, + "loss": 0.4527, + "step": 3672 + }, + { + "epoch": 2.44, + "learning_rate": 6.385839855266527e-06, + "loss": 0.4537, + "step": 3673 + }, + { + "epoch": 2.44, + "learning_rate": 6.380900673553567e-06, + "loss": 0.421, + "step": 3674 + }, + { + "epoch": 2.44, + "learning_rate": 6.375962507600009e-06, + "loss": 0.4492, + "step": 3675 + }, + { + "epoch": 2.44, + "learning_rate": 6.371025358791834e-06, + "loss": 0.5156, + "step": 3676 + }, + { + "epoch": 2.44, + "learning_rate": 6.366089228514727e-06, + "loss": 0.4256, + "step": 3677 + }, + { + "epoch": 2.44, + "learning_rate": 6.361154118154096e-06, + "loss": 0.4096, + "step": 3678 + }, + { + "epoch": 2.44, + "learning_rate": 6.356220029095061e-06, + "loss": 0.4158, + "step": 3679 + }, + { + "epoch": 2.44, + "learning_rate": 6.3512869627224535e-06, + "loss": 0.4658, + "step": 3680 + }, + { + "epoch": 2.44, + "learning_rate": 6.346354920420822e-06, + "loss": 0.4171, + "step": 3681 + }, + { + "epoch": 2.44, + "learning_rate": 6.341423903574426e-06, + "loss": 0.4592, + "step": 3682 + }, + { + "epoch": 2.44, + "learning_rate": 6.33649391356723e-06, + "loss": 0.4661, + "step": 3683 + }, + { + "epoch": 2.44, + "learning_rate": 6.331564951782918e-06, + "loss": 0.4568, + "step": 3684 + }, + { + "epoch": 2.45, + "learning_rate": 6.326637019604888e-06, + "loss": 0.399, + "step": 3685 + }, + { + "epoch": 2.45, + "learning_rate": 6.32171011841624e-06, + "loss": 0.4534, + "step": 3686 + }, + { + "epoch": 2.45, + "learning_rate": 6.316784249599792e-06, + "loss": 0.4481, + "step": 3687 + }, + { + "epoch": 2.45, + "learning_rate": 6.311859414538075e-06, + "loss": 0.4416, + "step": 3688 + }, + { + "epoch": 2.45, + "learning_rate": 6.306935614613312e-06, + "loss": 0.4382, + "step": 3689 + }, + { + "epoch": 2.45, + "learning_rate": 6.302012851207455e-06, + "loss": 0.4625, + "step": 3690 + }, + { + "epoch": 2.45, + "learning_rate": 6.297091125702157e-06, + "loss": 0.4316, + "step": 3691 + }, + { + "epoch": 2.45, + "learning_rate": 6.292170439478782e-06, + "loss": 0.4556, + "step": 3692 + }, + { + "epoch": 2.45, + "learning_rate": 6.2872507939184e-06, + "loss": 0.4212, + "step": 3693 + }, + { + "epoch": 2.45, + "learning_rate": 6.282332190401794e-06, + "loss": 0.4267, + "step": 3694 + }, + { + "epoch": 2.45, + "learning_rate": 6.277414630309444e-06, + "loss": 0.4399, + "step": 3695 + }, + { + "epoch": 2.45, + "learning_rate": 6.272498115021546e-06, + "loss": 0.4302, + "step": 3696 + }, + { + "epoch": 2.45, + "learning_rate": 6.267582645918001e-06, + "loss": 0.4348, + "step": 3697 + }, + { + "epoch": 2.45, + "learning_rate": 6.2626682243784155e-06, + "loss": 0.4467, + "step": 3698 + }, + { + "epoch": 2.46, + "learning_rate": 6.257754851782104e-06, + "loss": 0.4496, + "step": 3699 + }, + { + "epoch": 2.46, + "learning_rate": 6.252842529508081e-06, + "loss": 0.4466, + "step": 3700 + }, + { + "epoch": 2.46, + "learning_rate": 6.247931258935074e-06, + "loss": 0.4406, + "step": 3701 + }, + { + "epoch": 2.46, + "learning_rate": 6.243021041441508e-06, + "loss": 0.456, + "step": 3702 + }, + { + "epoch": 2.46, + "learning_rate": 6.238111878405518e-06, + "loss": 0.4616, + "step": 3703 + }, + { + "epoch": 2.46, + "learning_rate": 6.23320377120494e-06, + "loss": 0.3853, + "step": 3704 + }, + { + "epoch": 2.46, + "learning_rate": 6.228296721217317e-06, + "loss": 0.4379, + "step": 3705 + }, + { + "epoch": 2.46, + "learning_rate": 6.223390729819889e-06, + "loss": 0.4125, + "step": 3706 + }, + { + "epoch": 2.46, + "learning_rate": 6.218485798389604e-06, + "loss": 0.414, + "step": 3707 + }, + { + "epoch": 2.46, + "learning_rate": 6.213581928303112e-06, + "loss": 0.4495, + "step": 3708 + }, + { + "epoch": 2.46, + "learning_rate": 6.208679120936765e-06, + "loss": 0.4467, + "step": 3709 + }, + { + "epoch": 2.46, + "learning_rate": 6.2037773776666134e-06, + "loss": 0.4587, + "step": 3710 + }, + { + "epoch": 2.46, + "learning_rate": 6.198876699868415e-06, + "loss": 0.4626, + "step": 3711 + }, + { + "epoch": 2.46, + "learning_rate": 6.193977088917622e-06, + "loss": 0.4559, + "step": 3712 + }, + { + "epoch": 2.46, + "learning_rate": 6.189078546189393e-06, + "loss": 0.4426, + "step": 3713 + }, + { + "epoch": 2.47, + "learning_rate": 6.184181073058582e-06, + "loss": 0.4434, + "step": 3714 + }, + { + "epoch": 2.47, + "learning_rate": 6.179284670899745e-06, + "loss": 0.4431, + "step": 3715 + }, + { + "epoch": 2.47, + "learning_rate": 6.174389341087138e-06, + "loss": 0.4622, + "step": 3716 + }, + { + "epoch": 2.47, + "learning_rate": 6.1694950849947186e-06, + "loss": 0.4141, + "step": 3717 + }, + { + "epoch": 2.47, + "learning_rate": 6.164601903996134e-06, + "loss": 0.4155, + "step": 3718 + }, + { + "epoch": 2.47, + "learning_rate": 6.159709799464737e-06, + "loss": 0.434, + "step": 3719 + }, + { + "epoch": 2.47, + "learning_rate": 6.154818772773579e-06, + "loss": 0.4769, + "step": 3720 + }, + { + "epoch": 2.47, + "learning_rate": 6.149928825295406e-06, + "loss": 0.4362, + "step": 3721 + }, + { + "epoch": 2.47, + "learning_rate": 6.14503995840266e-06, + "loss": 0.4318, + "step": 3722 + }, + { + "epoch": 2.47, + "learning_rate": 6.140152173467486e-06, + "loss": 0.4583, + "step": 3723 + }, + { + "epoch": 2.47, + "learning_rate": 6.1352654718617156e-06, + "loss": 0.4347, + "step": 3724 + }, + { + "epoch": 2.47, + "learning_rate": 6.130379854956879e-06, + "loss": 0.4383, + "step": 3725 + }, + { + "epoch": 2.47, + "learning_rate": 6.125495324124212e-06, + "loss": 0.4309, + "step": 3726 + }, + { + "epoch": 2.47, + "learning_rate": 6.120611880734635e-06, + "loss": 0.4379, + "step": 3727 + }, + { + "epoch": 2.47, + "learning_rate": 6.115729526158769e-06, + "loss": 0.4644, + "step": 3728 + }, + { + "epoch": 2.48, + "learning_rate": 6.110848261766919e-06, + "loss": 0.418, + "step": 3729 + }, + { + "epoch": 2.48, + "learning_rate": 6.105968088929098e-06, + "loss": 0.4124, + "step": 3730 + }, + { + "epoch": 2.48, + "learning_rate": 6.1010890090150045e-06, + "loss": 0.4599, + "step": 3731 + }, + { + "epoch": 2.48, + "learning_rate": 6.0962110233940304e-06, + "loss": 0.4161, + "step": 3732 + }, + { + "epoch": 2.48, + "learning_rate": 6.091334133435263e-06, + "loss": 0.4202, + "step": 3733 + }, + { + "epoch": 2.48, + "learning_rate": 6.086458340507488e-06, + "loss": 0.4018, + "step": 3734 + }, + { + "epoch": 2.48, + "learning_rate": 6.081583645979168e-06, + "loss": 0.4603, + "step": 3735 + }, + { + "epoch": 2.48, + "learning_rate": 6.076710051218467e-06, + "loss": 0.4286, + "step": 3736 + }, + { + "epoch": 2.48, + "learning_rate": 6.071837557593239e-06, + "loss": 0.4514, + "step": 3737 + }, + { + "epoch": 2.48, + "learning_rate": 6.066966166471031e-06, + "loss": 0.4489, + "step": 3738 + }, + { + "epoch": 2.48, + "learning_rate": 6.062095879219079e-06, + "loss": 0.4607, + "step": 3739 + }, + { + "epoch": 2.48, + "learning_rate": 6.057226697204308e-06, + "loss": 0.4171, + "step": 3740 + }, + { + "epoch": 2.48, + "learning_rate": 6.052358621793333e-06, + "loss": 0.4348, + "step": 3741 + }, + { + "epoch": 2.48, + "learning_rate": 6.047491654352458e-06, + "loss": 0.4199, + "step": 3742 + }, + { + "epoch": 2.48, + "learning_rate": 6.042625796247678e-06, + "loss": 0.4575, + "step": 3743 + }, + { + "epoch": 2.49, + "learning_rate": 6.037761048844675e-06, + "loss": 0.4436, + "step": 3744 + }, + { + "epoch": 2.49, + "learning_rate": 6.032897413508822e-06, + "loss": 0.4454, + "step": 3745 + }, + { + "epoch": 2.49, + "learning_rate": 6.028034891605179e-06, + "loss": 0.4889, + "step": 3746 + }, + { + "epoch": 2.49, + "learning_rate": 6.023173484498486e-06, + "loss": 0.4478, + "step": 3747 + }, + { + "epoch": 2.49, + "learning_rate": 6.018313193553181e-06, + "loss": 0.413, + "step": 3748 + }, + { + "epoch": 2.49, + "learning_rate": 6.013454020133382e-06, + "loss": 0.4513, + "step": 3749 + }, + { + "epoch": 2.49, + "learning_rate": 6.0085959656028994e-06, + "loss": 0.4412, + "step": 3750 + }, + { + "epoch": 2.49, + "learning_rate": 6.003739031325223e-06, + "loss": 0.4786, + "step": 3751 + }, + { + "epoch": 2.49, + "learning_rate": 5.998883218663529e-06, + "loss": 0.4556, + "step": 3752 + }, + { + "epoch": 2.49, + "learning_rate": 5.994028528980682e-06, + "loss": 0.4328, + "step": 3753 + }, + { + "epoch": 2.49, + "learning_rate": 5.989174963639231e-06, + "loss": 0.4722, + "step": 3754 + }, + { + "epoch": 2.49, + "learning_rate": 5.984322524001409e-06, + "loss": 0.4108, + "step": 3755 + }, + { + "epoch": 2.49, + "learning_rate": 5.97947121142913e-06, + "loss": 0.4435, + "step": 3756 + }, + { + "epoch": 2.49, + "learning_rate": 5.974621027284e-06, + "loss": 0.4526, + "step": 3757 + }, + { + "epoch": 2.49, + "learning_rate": 5.969771972927294e-06, + "loss": 0.451, + "step": 3758 + }, + { + "epoch": 2.5, + "learning_rate": 5.96492404971998e-06, + "loss": 0.4491, + "step": 3759 + }, + { + "epoch": 2.5, + "learning_rate": 5.960077259022713e-06, + "loss": 0.4062, + "step": 3760 + }, + { + "epoch": 2.5, + "learning_rate": 5.955231602195819e-06, + "loss": 0.4243, + "step": 3761 + }, + { + "epoch": 2.5, + "learning_rate": 5.9503870805993135e-06, + "loss": 0.4772, + "step": 3762 + }, + { + "epoch": 2.5, + "learning_rate": 5.9455436955928924e-06, + "loss": 0.4362, + "step": 3763 + }, + { + "epoch": 2.5, + "learning_rate": 5.9407014485359236e-06, + "loss": 0.4642, + "step": 3764 + }, + { + "epoch": 2.5, + "learning_rate": 5.9358603407874695e-06, + "loss": 0.4353, + "step": 3765 + }, + { + "epoch": 2.5, + "learning_rate": 5.931020373706263e-06, + "loss": 0.4557, + "step": 3766 + }, + { + "epoch": 2.5, + "learning_rate": 5.926181548650718e-06, + "loss": 0.4088, + "step": 3767 + }, + { + "epoch": 2.5, + "learning_rate": 5.921343866978935e-06, + "loss": 0.4143, + "step": 3768 + }, + { + "epoch": 2.5, + "learning_rate": 5.916507330048691e-06, + "loss": 0.4344, + "step": 3769 + }, + { + "epoch": 2.5, + "learning_rate": 5.9116719392174304e-06, + "loss": 0.4211, + "step": 3770 + }, + { + "epoch": 2.5, + "learning_rate": 5.906837695842289e-06, + "loss": 0.4212, + "step": 3771 + }, + { + "epoch": 2.5, + "learning_rate": 5.902004601280076e-06, + "loss": 0.4682, + "step": 3772 + }, + { + "epoch": 2.5, + "learning_rate": 5.897172656887278e-06, + "loss": 0.4228, + "step": 3773 + }, + { + "epoch": 2.51, + "learning_rate": 5.892341864020062e-06, + "loss": 0.4392, + "step": 3774 + }, + { + "epoch": 2.51, + "learning_rate": 5.887512224034263e-06, + "loss": 0.452, + "step": 3775 + }, + { + "epoch": 2.51, + "learning_rate": 5.882683738285404e-06, + "loss": 0.4441, + "step": 3776 + }, + { + "epoch": 2.51, + "learning_rate": 5.877856408128675e-06, + "loss": 0.442, + "step": 3777 + }, + { + "epoch": 2.51, + "learning_rate": 5.873030234918948e-06, + "loss": 0.4472, + "step": 3778 + }, + { + "epoch": 2.51, + "learning_rate": 5.868205220010766e-06, + "loss": 0.4814, + "step": 3779 + }, + { + "epoch": 2.51, + "learning_rate": 5.8633813647583505e-06, + "loss": 0.4582, + "step": 3780 + }, + { + "epoch": 2.51, + "learning_rate": 5.858558670515591e-06, + "loss": 0.447, + "step": 3781 + }, + { + "epoch": 2.51, + "learning_rate": 5.853737138636058e-06, + "loss": 0.4183, + "step": 3782 + }, + { + "epoch": 2.51, + "learning_rate": 5.848916770472992e-06, + "loss": 0.4319, + "step": 3783 + }, + { + "epoch": 2.51, + "learning_rate": 5.84409756737931e-06, + "loss": 0.4229, + "step": 3784 + }, + { + "epoch": 2.51, + "learning_rate": 5.8392795307076e-06, + "loss": 0.4262, + "step": 3785 + }, + { + "epoch": 2.51, + "learning_rate": 5.834462661810123e-06, + "loss": 0.4874, + "step": 3786 + }, + { + "epoch": 2.51, + "learning_rate": 5.829646962038812e-06, + "loss": 0.4452, + "step": 3787 + }, + { + "epoch": 2.51, + "learning_rate": 5.824832432745267e-06, + "loss": 0.4797, + "step": 3788 + }, + { + "epoch": 2.52, + "learning_rate": 5.820019075280772e-06, + "loss": 0.4496, + "step": 3789 + }, + { + "epoch": 2.52, + "learning_rate": 5.815206890996267e-06, + "loss": 0.4279, + "step": 3790 + }, + { + "epoch": 2.52, + "learning_rate": 5.810395881242378e-06, + "loss": 0.4347, + "step": 3791 + }, + { + "epoch": 2.52, + "learning_rate": 5.805586047369389e-06, + "loss": 0.4187, + "step": 3792 + }, + { + "epoch": 2.52, + "learning_rate": 5.8007773907272565e-06, + "loss": 0.4372, + "step": 3793 + }, + { + "epoch": 2.52, + "learning_rate": 5.795969912665615e-06, + "loss": 0.3969, + "step": 3794 + }, + { + "epoch": 2.52, + "learning_rate": 5.791163614533753e-06, + "loss": 0.4203, + "step": 3795 + }, + { + "epoch": 2.52, + "learning_rate": 5.786358497680647e-06, + "loss": 0.4339, + "step": 3796 + }, + { + "epoch": 2.52, + "learning_rate": 5.7815545634549205e-06, + "loss": 0.4204, + "step": 3797 + }, + { + "epoch": 2.52, + "learning_rate": 5.776751813204887e-06, + "loss": 0.4482, + "step": 3798 + }, + { + "epoch": 2.52, + "learning_rate": 5.771950248278513e-06, + "loss": 0.4435, + "step": 3799 + }, + { + "epoch": 2.52, + "learning_rate": 5.76714987002343e-06, + "loss": 0.4183, + "step": 3800 + }, + { + "epoch": 2.52, + "learning_rate": 5.7623506797869525e-06, + "loss": 0.4705, + "step": 3801 + }, + { + "epoch": 2.52, + "learning_rate": 5.757552678916042e-06, + "loss": 0.4252, + "step": 3802 + }, + { + "epoch": 2.53, + "learning_rate": 5.752755868757345e-06, + "loss": 0.4253, + "step": 3803 + }, + { + "epoch": 2.53, + "learning_rate": 5.7479602506571615e-06, + "loss": 0.4645, + "step": 3804 + }, + { + "epoch": 2.53, + "learning_rate": 5.743165825961454e-06, + "loss": 0.4407, + "step": 3805 + }, + { + "epoch": 2.53, + "learning_rate": 5.738372596015867e-06, + "loss": 0.4389, + "step": 3806 + }, + { + "epoch": 2.53, + "learning_rate": 5.733580562165688e-06, + "loss": 0.4458, + "step": 3807 + }, + { + "epoch": 2.53, + "learning_rate": 5.72878972575589e-06, + "loss": 0.4327, + "step": 3808 + }, + { + "epoch": 2.53, + "learning_rate": 5.7240000881310945e-06, + "loss": 0.4501, + "step": 3809 + }, + { + "epoch": 2.53, + "learning_rate": 5.719211650635586e-06, + "loss": 0.4477, + "step": 3810 + }, + { + "epoch": 2.53, + "learning_rate": 5.714424414613329e-06, + "loss": 0.4432, + "step": 3811 + }, + { + "epoch": 2.53, + "learning_rate": 5.709638381407929e-06, + "loss": 0.4247, + "step": 3812 + }, + { + "epoch": 2.53, + "learning_rate": 5.704853552362674e-06, + "loss": 0.4022, + "step": 3813 + }, + { + "epoch": 2.53, + "learning_rate": 5.700069928820496e-06, + "loss": 0.4089, + "step": 3814 + }, + { + "epoch": 2.53, + "learning_rate": 5.695287512124011e-06, + "loss": 0.464, + "step": 3815 + }, + { + "epoch": 2.53, + "learning_rate": 5.690506303615461e-06, + "loss": 0.4569, + "step": 3816 + }, + { + "epoch": 2.53, + "learning_rate": 5.685726304636787e-06, + "loss": 0.4443, + "step": 3817 + }, + { + "epoch": 2.54, + "learning_rate": 5.680947516529566e-06, + "loss": 0.4363, + "step": 3818 + }, + { + "epoch": 2.54, + "learning_rate": 5.676169940635045e-06, + "loss": 0.41, + "step": 3819 + }, + { + "epoch": 2.54, + "learning_rate": 5.671393578294133e-06, + "loss": 0.449, + "step": 3820 + }, + { + "epoch": 2.54, + "learning_rate": 5.6666184308473915e-06, + "loss": 0.455, + "step": 3821 + }, + { + "epoch": 2.54, + "learning_rate": 5.661844499635043e-06, + "loss": 0.4161, + "step": 3822 + }, + { + "epoch": 2.54, + "learning_rate": 5.657071785996966e-06, + "loss": 0.4271, + "step": 3823 + }, + { + "epoch": 2.54, + "learning_rate": 5.652300291272707e-06, + "loss": 0.4935, + "step": 3824 + }, + { + "epoch": 2.54, + "learning_rate": 5.647530016801457e-06, + "loss": 0.4375, + "step": 3825 + }, + { + "epoch": 2.54, + "learning_rate": 5.6427609639220804e-06, + "loss": 0.4461, + "step": 3826 + }, + { + "epoch": 2.54, + "learning_rate": 5.637993133973083e-06, + "loss": 0.4122, + "step": 3827 + }, + { + "epoch": 2.54, + "learning_rate": 5.633226528292635e-06, + "loss": 0.4563, + "step": 3828 + }, + { + "epoch": 2.54, + "learning_rate": 5.628461148218565e-06, + "loss": 0.4445, + "step": 3829 + }, + { + "epoch": 2.54, + "learning_rate": 5.62369699508835e-06, + "loss": 0.4412, + "step": 3830 + }, + { + "epoch": 2.54, + "learning_rate": 5.618934070239135e-06, + "loss": 0.4524, + "step": 3831 + }, + { + "epoch": 2.54, + "learning_rate": 5.6141723750077095e-06, + "loss": 0.4851, + "step": 3832 + }, + { + "epoch": 2.55, + "learning_rate": 5.6094119107305145e-06, + "loss": 0.4015, + "step": 3833 + }, + { + "epoch": 2.55, + "learning_rate": 5.604652678743663e-06, + "loss": 0.4392, + "step": 3834 + }, + { + "epoch": 2.55, + "learning_rate": 5.5998946803829e-06, + "loss": 0.444, + "step": 3835 + }, + { + "epoch": 2.55, + "learning_rate": 5.595137916983647e-06, + "loss": 0.4508, + "step": 3836 + }, + { + "epoch": 2.55, + "learning_rate": 5.5903823898809586e-06, + "loss": 0.4389, + "step": 3837 + }, + { + "epoch": 2.55, + "learning_rate": 5.585628100409557e-06, + "loss": 0.3999, + "step": 3838 + }, + { + "epoch": 2.55, + "learning_rate": 5.580875049903811e-06, + "loss": 0.4794, + "step": 3839 + }, + { + "epoch": 2.55, + "learning_rate": 5.576123239697735e-06, + "loss": 0.4045, + "step": 3840 + }, + { + "epoch": 2.55, + "learning_rate": 5.57137267112501e-06, + "loss": 0.4505, + "step": 3841 + }, + { + "epoch": 2.55, + "learning_rate": 5.5666233455189556e-06, + "loss": 0.4002, + "step": 3842 + }, + { + "epoch": 2.55, + "learning_rate": 5.561875264212553e-06, + "loss": 0.4334, + "step": 3843 + }, + { + "epoch": 2.55, + "learning_rate": 5.557128428538425e-06, + "loss": 0.4159, + "step": 3844 + }, + { + "epoch": 2.55, + "learning_rate": 5.552382839828847e-06, + "loss": 0.4338, + "step": 3845 + }, + { + "epoch": 2.55, + "learning_rate": 5.547638499415751e-06, + "loss": 0.459, + "step": 3846 + }, + { + "epoch": 2.55, + "learning_rate": 5.5428954086307085e-06, + "loss": 0.4299, + "step": 3847 + }, + { + "epoch": 2.56, + "learning_rate": 5.538153568804951e-06, + "loss": 0.4158, + "step": 3848 + }, + { + "epoch": 2.56, + "learning_rate": 5.53341298126935e-06, + "loss": 0.4511, + "step": 3849 + }, + { + "epoch": 2.56, + "learning_rate": 5.528673647354432e-06, + "loss": 0.4259, + "step": 3850 + }, + { + "epoch": 2.56, + "learning_rate": 5.52393556839036e-06, + "loss": 0.4301, + "step": 3851 + }, + { + "epoch": 2.56, + "learning_rate": 5.51919874570696e-06, + "loss": 0.4522, + "step": 3852 + }, + { + "epoch": 2.56, + "learning_rate": 5.514463180633702e-06, + "loss": 0.4308, + "step": 3853 + }, + { + "epoch": 2.56, + "learning_rate": 5.509728874499692e-06, + "loss": 0.4333, + "step": 3854 + }, + { + "epoch": 2.56, + "learning_rate": 5.504995828633704e-06, + "loss": 0.4707, + "step": 3855 + }, + { + "epoch": 2.56, + "learning_rate": 5.500264044364124e-06, + "loss": 0.4298, + "step": 3856 + }, + { + "epoch": 2.56, + "learning_rate": 5.495533523019021e-06, + "loss": 0.4242, + "step": 3857 + }, + { + "epoch": 2.56, + "learning_rate": 5.490804265926084e-06, + "loss": 0.4345, + "step": 3858 + }, + { + "epoch": 2.56, + "learning_rate": 5.486076274412664e-06, + "loss": 0.4507, + "step": 3859 + }, + { + "epoch": 2.56, + "learning_rate": 5.481349549805741e-06, + "loss": 0.4404, + "step": 3860 + }, + { + "epoch": 2.56, + "learning_rate": 5.476624093431956e-06, + "loss": 0.4434, + "step": 3861 + }, + { + "epoch": 2.56, + "learning_rate": 5.471899906617581e-06, + "loss": 0.4121, + "step": 3862 + }, + { + "epoch": 2.57, + "learning_rate": 5.467176990688534e-06, + "loss": 0.4289, + "step": 3863 + }, + { + "epoch": 2.57, + "learning_rate": 5.462455346970383e-06, + "loss": 0.433, + "step": 3864 + }, + { + "epoch": 2.57, + "learning_rate": 5.457734976788331e-06, + "loss": 0.4319, + "step": 3865 + }, + { + "epoch": 2.57, + "learning_rate": 5.4530158814672315e-06, + "loss": 0.448, + "step": 3866 + }, + { + "epoch": 2.57, + "learning_rate": 5.448298062331574e-06, + "loss": 0.4342, + "step": 3867 + }, + { + "epoch": 2.57, + "learning_rate": 5.443581520705485e-06, + "loss": 0.4472, + "step": 3868 + }, + { + "epoch": 2.57, + "learning_rate": 5.438866257912751e-06, + "loss": 0.4287, + "step": 3869 + }, + { + "epoch": 2.57, + "learning_rate": 5.434152275276776e-06, + "loss": 0.4306, + "step": 3870 + }, + { + "epoch": 2.57, + "learning_rate": 5.429439574120627e-06, + "loss": 0.4117, + "step": 3871 + }, + { + "epoch": 2.57, + "learning_rate": 5.424728155766994e-06, + "loss": 0.4475, + "step": 3872 + }, + { + "epoch": 2.57, + "learning_rate": 5.420018021538211e-06, + "loss": 0.4138, + "step": 3873 + }, + { + "epoch": 2.57, + "learning_rate": 5.415309172756263e-06, + "loss": 0.4391, + "step": 3874 + }, + { + "epoch": 2.57, + "learning_rate": 5.410601610742754e-06, + "loss": 0.4368, + "step": 3875 + }, + { + "epoch": 2.57, + "learning_rate": 5.4058953368189515e-06, + "loss": 0.4226, + "step": 3876 + }, + { + "epoch": 2.57, + "learning_rate": 5.401190352305736e-06, + "loss": 0.4685, + "step": 3877 + }, + { + "epoch": 2.58, + "learning_rate": 5.396486658523647e-06, + "loss": 0.4473, + "step": 3878 + }, + { + "epoch": 2.58, + "learning_rate": 5.391784256792851e-06, + "loss": 0.437, + "step": 3879 + }, + { + "epoch": 2.58, + "learning_rate": 5.38708314843315e-06, + "loss": 0.415, + "step": 3880 + }, + { + "epoch": 2.58, + "learning_rate": 5.382383334763992e-06, + "loss": 0.4128, + "step": 3881 + }, + { + "epoch": 2.58, + "learning_rate": 5.377684817104451e-06, + "loss": 0.4946, + "step": 3882 + }, + { + "epoch": 2.58, + "learning_rate": 5.372987596773249e-06, + "loss": 0.4345, + "step": 3883 + }, + { + "epoch": 2.58, + "learning_rate": 5.368291675088736e-06, + "loss": 0.457, + "step": 3884 + }, + { + "epoch": 2.58, + "learning_rate": 5.363597053368897e-06, + "loss": 0.4408, + "step": 3885 + }, + { + "epoch": 2.58, + "learning_rate": 5.3589037329313534e-06, + "loss": 0.453, + "step": 3886 + }, + { + "epoch": 2.58, + "learning_rate": 5.354211715093361e-06, + "loss": 0.4598, + "step": 3887 + }, + { + "epoch": 2.58, + "learning_rate": 5.34952100117182e-06, + "loss": 0.4323, + "step": 3888 + }, + { + "epoch": 2.58, + "learning_rate": 5.344831592483249e-06, + "loss": 0.4242, + "step": 3889 + }, + { + "epoch": 2.58, + "learning_rate": 5.340143490343813e-06, + "loss": 0.4336, + "step": 3890 + }, + { + "epoch": 2.58, + "learning_rate": 5.3354566960692945e-06, + "loss": 0.4291, + "step": 3891 + }, + { + "epoch": 2.58, + "learning_rate": 5.3307712109751274e-06, + "loss": 0.4126, + "step": 3892 + }, + { + "epoch": 2.59, + "learning_rate": 5.3260870363763635e-06, + "loss": 0.4418, + "step": 3893 + }, + { + "epoch": 2.59, + "learning_rate": 5.321404173587696e-06, + "loss": 0.4345, + "step": 3894 + }, + { + "epoch": 2.59, + "learning_rate": 5.316722623923454e-06, + "loss": 0.4443, + "step": 3895 + }, + { + "epoch": 2.59, + "learning_rate": 5.312042388697582e-06, + "loss": 0.4251, + "step": 3896 + }, + { + "epoch": 2.59, + "learning_rate": 5.307363469223667e-06, + "loss": 0.4331, + "step": 3897 + }, + { + "epoch": 2.59, + "learning_rate": 5.3026858668149205e-06, + "loss": 0.4175, + "step": 3898 + }, + { + "epoch": 2.59, + "learning_rate": 5.298009582784196e-06, + "loss": 0.4254, + "step": 3899 + }, + { + "epoch": 2.59, + "learning_rate": 5.293334618443962e-06, + "loss": 0.4211, + "step": 3900 + }, + { + "epoch": 2.59, + "learning_rate": 5.2886609751063275e-06, + "loss": 0.4537, + "step": 3901 + }, + { + "epoch": 2.59, + "learning_rate": 5.283988654083029e-06, + "loss": 0.4242, + "step": 3902 + }, + { + "epoch": 2.59, + "learning_rate": 5.279317656685421e-06, + "loss": 0.3906, + "step": 3903 + }, + { + "epoch": 2.59, + "learning_rate": 5.274647984224506e-06, + "loss": 0.4283, + "step": 3904 + }, + { + "epoch": 2.59, + "learning_rate": 5.269979638010893e-06, + "loss": 0.4255, + "step": 3905 + }, + { + "epoch": 2.59, + "learning_rate": 5.265312619354843e-06, + "loss": 0.4389, + "step": 3906 + }, + { + "epoch": 2.6, + "learning_rate": 5.2606469295662224e-06, + "loss": 0.4016, + "step": 3907 + }, + { + "epoch": 2.6, + "learning_rate": 5.25598256995453e-06, + "loss": 0.4627, + "step": 3908 + }, + { + "epoch": 2.6, + "learning_rate": 5.2513195418289035e-06, + "loss": 0.416, + "step": 3909 + }, + { + "epoch": 2.6, + "learning_rate": 5.24665784649809e-06, + "loss": 0.4259, + "step": 3910 + }, + { + "epoch": 2.6, + "learning_rate": 5.241997485270478e-06, + "loss": 0.4351, + "step": 3911 + }, + { + "epoch": 2.6, + "learning_rate": 5.237338459454067e-06, + "loss": 0.4337, + "step": 3912 + }, + { + "epoch": 2.6, + "learning_rate": 5.232680770356495e-06, + "loss": 0.4478, + "step": 3913 + }, + { + "epoch": 2.6, + "learning_rate": 5.2280244192850185e-06, + "loss": 0.4425, + "step": 3914 + }, + { + "epoch": 2.6, + "learning_rate": 5.223369407546509e-06, + "loss": 0.4473, + "step": 3915 + }, + { + "epoch": 2.6, + "learning_rate": 5.218715736447484e-06, + "loss": 0.4829, + "step": 3916 + }, + { + "epoch": 2.6, + "learning_rate": 5.2140634072940624e-06, + "loss": 0.4156, + "step": 3917 + }, + { + "epoch": 2.6, + "learning_rate": 5.209412421392005e-06, + "loss": 0.423, + "step": 3918 + }, + { + "epoch": 2.6, + "learning_rate": 5.204762780046682e-06, + "loss": 0.4634, + "step": 3919 + }, + { + "epoch": 2.6, + "learning_rate": 5.2001144845630906e-06, + "loss": 0.4425, + "step": 3920 + }, + { + "epoch": 2.6, + "learning_rate": 5.195467536245855e-06, + "loss": 0.4434, + "step": 3921 + }, + { + "epoch": 2.61, + "learning_rate": 5.19082193639921e-06, + "loss": 0.4177, + "step": 3922 + }, + { + "epoch": 2.61, + "learning_rate": 5.186177686327029e-06, + "loss": 0.4291, + "step": 3923 + }, + { + "epoch": 2.61, + "learning_rate": 5.181534787332791e-06, + "loss": 0.4606, + "step": 3924 + }, + { + "epoch": 2.61, + "learning_rate": 5.176893240719602e-06, + "loss": 0.3969, + "step": 3925 + }, + { + "epoch": 2.61, + "learning_rate": 5.172253047790187e-06, + "loss": 0.4138, + "step": 3926 + }, + { + "epoch": 2.61, + "learning_rate": 5.16761420984689e-06, + "loss": 0.4162, + "step": 3927 + }, + { + "epoch": 2.61, + "learning_rate": 5.162976728191685e-06, + "loss": 0.4464, + "step": 3928 + }, + { + "epoch": 2.61, + "learning_rate": 5.158340604126148e-06, + "loss": 0.4454, + "step": 3929 + }, + { + "epoch": 2.61, + "learning_rate": 5.153705838951495e-06, + "loss": 0.4015, + "step": 3930 + }, + { + "epoch": 2.61, + "learning_rate": 5.149072433968533e-06, + "loss": 0.4529, + "step": 3931 + }, + { + "epoch": 2.61, + "learning_rate": 5.144440390477714e-06, + "loss": 0.4596, + "step": 3932 + }, + { + "epoch": 2.61, + "learning_rate": 5.139809709779089e-06, + "loss": 0.4108, + "step": 3933 + }, + { + "epoch": 2.61, + "learning_rate": 5.135180393172343e-06, + "loss": 0.4344, + "step": 3934 + }, + { + "epoch": 2.61, + "learning_rate": 5.1305524419567595e-06, + "loss": 0.4245, + "step": 3935 + }, + { + "epoch": 2.61, + "learning_rate": 5.12592585743126e-06, + "loss": 0.41, + "step": 3936 + }, + { + "epoch": 2.62, + "learning_rate": 5.1213006408943645e-06, + "loss": 0.4678, + "step": 3937 + }, + { + "epoch": 2.62, + "learning_rate": 5.116676793644212e-06, + "loss": 0.4478, + "step": 3938 + }, + { + "epoch": 2.62, + "learning_rate": 5.11205431697857e-06, + "loss": 0.4218, + "step": 3939 + }, + { + "epoch": 2.62, + "learning_rate": 5.107433212194801e-06, + "loss": 0.4355, + "step": 3940 + }, + { + "epoch": 2.62, + "learning_rate": 5.102813480589905e-06, + "loss": 0.4411, + "step": 3941 + }, + { + "epoch": 2.62, + "learning_rate": 5.098195123460481e-06, + "loss": 0.4242, + "step": 3942 + }, + { + "epoch": 2.62, + "learning_rate": 5.093578142102742e-06, + "loss": 0.4505, + "step": 3943 + }, + { + "epoch": 2.62, + "learning_rate": 5.0889625378125255e-06, + "loss": 0.4334, + "step": 3944 + }, + { + "epoch": 2.62, + "learning_rate": 5.08434831188527e-06, + "loss": 0.4675, + "step": 3945 + }, + { + "epoch": 2.62, + "learning_rate": 5.079735465616041e-06, + "loss": 0.4508, + "step": 3946 + }, + { + "epoch": 2.62, + "learning_rate": 5.075124000299506e-06, + "loss": 0.4567, + "step": 3947 + }, + { + "epoch": 2.62, + "learning_rate": 5.070513917229944e-06, + "loss": 0.4502, + "step": 3948 + }, + { + "epoch": 2.62, + "learning_rate": 5.065905217701257e-06, + "loss": 0.3799, + "step": 3949 + }, + { + "epoch": 2.62, + "learning_rate": 5.061297903006943e-06, + "loss": 0.4242, + "step": 3950 + }, + { + "epoch": 2.62, + "learning_rate": 5.056691974440132e-06, + "loss": 0.4438, + "step": 3951 + }, + { + "epoch": 2.63, + "learning_rate": 5.052087433293542e-06, + "loss": 0.4284, + "step": 3952 + }, + { + "epoch": 2.63, + "learning_rate": 5.0474842808595226e-06, + "loss": 0.4535, + "step": 3953 + }, + { + "epoch": 2.63, + "learning_rate": 5.042882518430018e-06, + "loss": 0.4418, + "step": 3954 + }, + { + "epoch": 2.63, + "learning_rate": 5.038282147296585e-06, + "loss": 0.4433, + "step": 3955 + }, + { + "epoch": 2.63, + "learning_rate": 5.033683168750401e-06, + "loss": 0.4453, + "step": 3956 + }, + { + "epoch": 2.63, + "learning_rate": 5.029085584082238e-06, + "loss": 0.4176, + "step": 3957 + }, + { + "epoch": 2.63, + "learning_rate": 5.024489394582488e-06, + "loss": 0.4295, + "step": 3958 + }, + { + "epoch": 2.63, + "learning_rate": 5.019894601541144e-06, + "loss": 0.4894, + "step": 3959 + }, + { + "epoch": 2.63, + "learning_rate": 5.015301206247813e-06, + "loss": 0.4646, + "step": 3960 + }, + { + "epoch": 2.63, + "learning_rate": 5.010709209991698e-06, + "loss": 0.4669, + "step": 3961 + }, + { + "epoch": 2.63, + "learning_rate": 5.0061186140616235e-06, + "loss": 0.4535, + "step": 3962 + }, + { + "epoch": 2.63, + "learning_rate": 5.0015294197460204e-06, + "loss": 0.4321, + "step": 3963 + }, + { + "epoch": 2.63, + "learning_rate": 4.996941628332912e-06, + "loss": 0.4394, + "step": 3964 + }, + { + "epoch": 2.63, + "learning_rate": 4.992355241109949e-06, + "loss": 0.4583, + "step": 3965 + }, + { + "epoch": 2.63, + "learning_rate": 4.987770259364359e-06, + "loss": 0.4509, + "step": 3966 + }, + { + "epoch": 2.64, + "learning_rate": 4.983186684383007e-06, + "loss": 0.3877, + "step": 3967 + }, + { + "epoch": 2.64, + "learning_rate": 4.978604517452338e-06, + "loss": 0.3827, + "step": 3968 + }, + { + "epoch": 2.64, + "learning_rate": 4.974023759858416e-06, + "loss": 0.4354, + "step": 3969 + }, + { + "epoch": 2.64, + "learning_rate": 4.96944441288691e-06, + "loss": 0.427, + "step": 3970 + }, + { + "epoch": 2.64, + "learning_rate": 4.9648664778230856e-06, + "loss": 0.4127, + "step": 3971 + }, + { + "epoch": 2.64, + "learning_rate": 4.960289955951813e-06, + "loss": 0.4374, + "step": 3972 + }, + { + "epoch": 2.64, + "learning_rate": 4.9557148485575665e-06, + "loss": 0.4438, + "step": 3973 + }, + { + "epoch": 2.64, + "learning_rate": 4.951141156924432e-06, + "loss": 0.4572, + "step": 3974 + }, + { + "epoch": 2.64, + "learning_rate": 4.94656888233608e-06, + "loss": 0.4247, + "step": 3975 + }, + { + "epoch": 2.64, + "learning_rate": 4.941998026075806e-06, + "loss": 0.416, + "step": 3976 + }, + { + "epoch": 2.64, + "learning_rate": 4.937428589426489e-06, + "loss": 0.4632, + "step": 3977 + }, + { + "epoch": 2.64, + "learning_rate": 4.932860573670614e-06, + "loss": 0.4342, + "step": 3978 + }, + { + "epoch": 2.64, + "learning_rate": 4.9282939800902764e-06, + "loss": 0.4363, + "step": 3979 + }, + { + "epoch": 2.64, + "learning_rate": 4.923728809967156e-06, + "loss": 0.4296, + "step": 3980 + }, + { + "epoch": 2.64, + "learning_rate": 4.919165064582553e-06, + "loss": 0.4325, + "step": 3981 + }, + { + "epoch": 2.65, + "learning_rate": 4.914602745217352e-06, + "loss": 0.4414, + "step": 3982 + }, + { + "epoch": 2.65, + "learning_rate": 4.910041853152038e-06, + "loss": 0.4372, + "step": 3983 + }, + { + "epoch": 2.65, + "learning_rate": 4.905482389666708e-06, + "loss": 0.4409, + "step": 3984 + }, + { + "epoch": 2.65, + "learning_rate": 4.900924356041044e-06, + "loss": 0.4433, + "step": 3985 + }, + { + "epoch": 2.65, + "learning_rate": 4.896367753554336e-06, + "loss": 0.4482, + "step": 3986 + }, + { + "epoch": 2.65, + "learning_rate": 4.891812583485467e-06, + "loss": 0.4383, + "step": 3987 + }, + { + "epoch": 2.65, + "learning_rate": 4.887258847112923e-06, + "loss": 0.4165, + "step": 3988 + }, + { + "epoch": 2.65, + "learning_rate": 4.882706545714783e-06, + "loss": 0.4243, + "step": 3989 + }, + { + "epoch": 2.65, + "learning_rate": 4.878155680568721e-06, + "loss": 0.4543, + "step": 3990 + }, + { + "epoch": 2.65, + "learning_rate": 4.87360625295202e-06, + "loss": 0.4425, + "step": 3991 + }, + { + "epoch": 2.65, + "learning_rate": 4.869058264141541e-06, + "loss": 0.4241, + "step": 3992 + }, + { + "epoch": 2.65, + "learning_rate": 4.864511715413761e-06, + "loss": 0.4387, + "step": 3993 + }, + { + "epoch": 2.65, + "learning_rate": 4.8599666080447395e-06, + "loss": 0.4028, + "step": 3994 + }, + { + "epoch": 2.65, + "learning_rate": 4.855422943310129e-06, + "loss": 0.4479, + "step": 3995 + }, + { + "epoch": 2.65, + "learning_rate": 4.850880722485195e-06, + "loss": 0.4545, + "step": 3996 + }, + { + "epoch": 2.66, + "learning_rate": 4.846339946844776e-06, + "loss": 0.4222, + "step": 3997 + }, + { + "epoch": 2.66, + "learning_rate": 4.841800617663322e-06, + "loss": 0.4789, + "step": 3998 + }, + { + "epoch": 2.66, + "learning_rate": 4.8372627362148675e-06, + "loss": 0.4321, + "step": 3999 + }, + { + "epoch": 2.66, + "learning_rate": 4.832726303773042e-06, + "loss": 0.4266, + "step": 4000 + }, + { + "epoch": 2.66, + "learning_rate": 4.8281913216110665e-06, + "loss": 0.4097, + "step": 4001 + }, + { + "epoch": 2.66, + "learning_rate": 4.823657791001766e-06, + "loss": 0.4569, + "step": 4002 + }, + { + "epoch": 2.66, + "learning_rate": 4.819125713217541e-06, + "loss": 0.4252, + "step": 4003 + }, + { + "epoch": 2.66, + "learning_rate": 4.814595089530396e-06, + "loss": 0.4612, + "step": 4004 + }, + { + "epoch": 2.66, + "learning_rate": 4.810065921211936e-06, + "loss": 0.4303, + "step": 4005 + }, + { + "epoch": 2.66, + "learning_rate": 4.805538209533328e-06, + "loss": 0.4585, + "step": 4006 + }, + { + "epoch": 2.66, + "learning_rate": 4.801011955765361e-06, + "loss": 0.4431, + "step": 4007 + }, + { + "epoch": 2.66, + "learning_rate": 4.796487161178394e-06, + "loss": 0.4408, + "step": 4008 + }, + { + "epoch": 2.66, + "learning_rate": 4.791963827042392e-06, + "loss": 0.4571, + "step": 4009 + }, + { + "epoch": 2.66, + "learning_rate": 4.787441954626895e-06, + "loss": 0.435, + "step": 4010 + }, + { + "epoch": 2.67, + "learning_rate": 4.782921545201049e-06, + "loss": 0.4716, + "step": 4011 + }, + { + "epoch": 2.67, + "learning_rate": 4.7784026000335755e-06, + "loss": 0.4543, + "step": 4012 + }, + { + "epoch": 2.67, + "learning_rate": 4.773885120392788e-06, + "loss": 0.4447, + "step": 4013 + }, + { + "epoch": 2.67, + "learning_rate": 4.769369107546598e-06, + "loss": 0.4332, + "step": 4014 + }, + { + "epoch": 2.67, + "learning_rate": 4.764854562762491e-06, + "loss": 0.4549, + "step": 4015 + }, + { + "epoch": 2.67, + "learning_rate": 4.7603414873075545e-06, + "loss": 0.4357, + "step": 4016 + }, + { + "epoch": 2.67, + "learning_rate": 4.755829882448455e-06, + "loss": 0.423, + "step": 4017 + }, + { + "epoch": 2.67, + "learning_rate": 4.751319749451443e-06, + "loss": 0.4224, + "step": 4018 + }, + { + "epoch": 2.67, + "learning_rate": 4.746811089582368e-06, + "loss": 0.3962, + "step": 4019 + }, + { + "epoch": 2.67, + "learning_rate": 4.742303904106653e-06, + "loss": 0.4106, + "step": 4020 + }, + { + "epoch": 2.67, + "learning_rate": 4.73779819428932e-06, + "loss": 0.4429, + "step": 4021 + }, + { + "epoch": 2.67, + "learning_rate": 4.733293961394967e-06, + "loss": 0.4902, + "step": 4022 + }, + { + "epoch": 2.67, + "learning_rate": 4.728791206687776e-06, + "loss": 0.467, + "step": 4023 + }, + { + "epoch": 2.67, + "learning_rate": 4.7242899314315275e-06, + "loss": 0.4164, + "step": 4024 + }, + { + "epoch": 2.67, + "learning_rate": 4.719790136889569e-06, + "loss": 0.4542, + "step": 4025 + }, + { + "epoch": 2.68, + "learning_rate": 4.71529182432485e-06, + "loss": 0.403, + "step": 4026 + }, + { + "epoch": 2.68, + "learning_rate": 4.710794994999888e-06, + "loss": 0.4358, + "step": 4027 + }, + { + "epoch": 2.68, + "learning_rate": 4.706299650176802e-06, + "loss": 0.4522, + "step": 4028 + }, + { + "epoch": 2.68, + "learning_rate": 4.701805791117269e-06, + "loss": 0.4088, + "step": 4029 + }, + { + "epoch": 2.68, + "learning_rate": 4.697313419082573e-06, + "loss": 0.451, + "step": 4030 + }, + { + "epoch": 2.68, + "learning_rate": 4.692822535333575e-06, + "loss": 0.4132, + "step": 4031 + }, + { + "epoch": 2.68, + "learning_rate": 4.688333141130705e-06, + "loss": 0.4412, + "step": 4032 + }, + { + "epoch": 2.68, + "learning_rate": 4.683845237733996e-06, + "loss": 0.457, + "step": 4033 + }, + { + "epoch": 2.68, + "learning_rate": 4.679358826403045e-06, + "loss": 0.431, + "step": 4034 + }, + { + "epoch": 2.68, + "learning_rate": 4.674873908397039e-06, + "loss": 0.4441, + "step": 4035 + }, + { + "epoch": 2.68, + "learning_rate": 4.6703904849747376e-06, + "loss": 0.4424, + "step": 4036 + }, + { + "epoch": 2.68, + "learning_rate": 4.665908557394492e-06, + "loss": 0.4337, + "step": 4037 + }, + { + "epoch": 2.68, + "learning_rate": 4.661428126914233e-06, + "loss": 0.4703, + "step": 4038 + }, + { + "epoch": 2.68, + "learning_rate": 4.6569491947914555e-06, + "loss": 0.4479, + "step": 4039 + }, + { + "epoch": 2.68, + "learning_rate": 4.65247176228326e-06, + "loss": 0.4456, + "step": 4040 + }, + { + "epoch": 2.69, + "learning_rate": 4.647995830646296e-06, + "loss": 0.5017, + "step": 4041 + }, + { + "epoch": 2.69, + "learning_rate": 4.643521401136816e-06, + "loss": 0.4032, + "step": 4042 + }, + { + "epoch": 2.69, + "learning_rate": 4.639048475010636e-06, + "loss": 0.4391, + "step": 4043 + }, + { + "epoch": 2.69, + "learning_rate": 4.634577053523162e-06, + "loss": 0.4689, + "step": 4044 + }, + { + "epoch": 2.69, + "learning_rate": 4.630107137929365e-06, + "loss": 0.4273, + "step": 4045 + }, + { + "epoch": 2.69, + "learning_rate": 4.625638729483808e-06, + "loss": 0.4457, + "step": 4046 + }, + { + "epoch": 2.69, + "learning_rate": 4.621171829440617e-06, + "loss": 0.4647, + "step": 4047 + }, + { + "epoch": 2.69, + "learning_rate": 4.6167064390535e-06, + "loss": 0.4494, + "step": 4048 + }, + { + "epoch": 2.69, + "learning_rate": 4.612242559575746e-06, + "loss": 0.4335, + "step": 4049 + }, + { + "epoch": 2.69, + "learning_rate": 4.6077801922602105e-06, + "loss": 0.4495, + "step": 4050 + }, + { + "epoch": 2.69, + "learning_rate": 4.603319338359335e-06, + "loss": 0.459, + "step": 4051 + }, + { + "epoch": 2.69, + "learning_rate": 4.598859999125132e-06, + "loss": 0.449, + "step": 4052 + }, + { + "epoch": 2.69, + "learning_rate": 4.594402175809179e-06, + "loss": 0.489, + "step": 4053 + }, + { + "epoch": 2.69, + "learning_rate": 4.589945869662647e-06, + "loss": 0.4142, + "step": 4054 + }, + { + "epoch": 2.69, + "learning_rate": 4.585491081936263e-06, + "loss": 0.4366, + "step": 4055 + }, + { + "epoch": 2.7, + "learning_rate": 4.581037813880344e-06, + "loss": 0.4595, + "step": 4056 + }, + { + "epoch": 2.7, + "learning_rate": 4.5765860667447685e-06, + "loss": 0.4137, + "step": 4057 + }, + { + "epoch": 2.7, + "learning_rate": 4.572135841778989e-06, + "loss": 0.4234, + "step": 4058 + }, + { + "epoch": 2.7, + "learning_rate": 4.56768714023204e-06, + "loss": 0.4251, + "step": 4059 + }, + { + "epoch": 2.7, + "learning_rate": 4.563239963352517e-06, + "loss": 0.441, + "step": 4060 + }, + { + "epoch": 2.7, + "learning_rate": 4.558794312388598e-06, + "loss": 0.4508, + "step": 4061 + }, + { + "epoch": 2.7, + "learning_rate": 4.554350188588021e-06, + "loss": 0.4518, + "step": 4062 + }, + { + "epoch": 2.7, + "learning_rate": 4.549907593198111e-06, + "loss": 0.445, + "step": 4063 + }, + { + "epoch": 2.7, + "learning_rate": 4.54546652746575e-06, + "loss": 0.4511, + "step": 4064 + }, + { + "epoch": 2.7, + "learning_rate": 4.5410269926373905e-06, + "loss": 0.3977, + "step": 4065 + }, + { + "epoch": 2.7, + "learning_rate": 4.536588989959071e-06, + "loss": 0.4401, + "step": 4066 + }, + { + "epoch": 2.7, + "learning_rate": 4.5321525206763805e-06, + "loss": 0.4592, + "step": 4067 + }, + { + "epoch": 2.7, + "learning_rate": 4.527717586034494e-06, + "loss": 0.4071, + "step": 4068 + }, + { + "epoch": 2.7, + "learning_rate": 4.523284187278144e-06, + "loss": 0.4383, + "step": 4069 + }, + { + "epoch": 2.7, + "learning_rate": 4.518852325651638e-06, + "loss": 0.5033, + "step": 4070 + }, + { + "epoch": 2.71, + "learning_rate": 4.514422002398846e-06, + "loss": 0.4573, + "step": 4071 + }, + { + "epoch": 2.71, + "learning_rate": 4.5099932187632146e-06, + "loss": 0.4646, + "step": 4072 + }, + { + "epoch": 2.71, + "learning_rate": 4.505565975987757e-06, + "loss": 0.437, + "step": 4073 + }, + { + "epoch": 2.71, + "learning_rate": 4.501140275315049e-06, + "loss": 0.4392, + "step": 4074 + }, + { + "epoch": 2.71, + "learning_rate": 4.496716117987234e-06, + "loss": 0.4408, + "step": 4075 + }, + { + "epoch": 2.71, + "learning_rate": 4.492293505246024e-06, + "loss": 0.4576, + "step": 4076 + }, + { + "epoch": 2.71, + "learning_rate": 4.4878724383327e-06, + "loss": 0.4386, + "step": 4077 + }, + { + "epoch": 2.71, + "learning_rate": 4.483452918488104e-06, + "loss": 0.4628, + "step": 4078 + }, + { + "epoch": 2.71, + "learning_rate": 4.479034946952646e-06, + "loss": 0.4258, + "step": 4079 + }, + { + "epoch": 2.71, + "learning_rate": 4.474618524966313e-06, + "loss": 0.439, + "step": 4080 + }, + { + "epoch": 2.71, + "learning_rate": 4.47020365376863e-06, + "loss": 0.4353, + "step": 4081 + }, + { + "epoch": 2.71, + "learning_rate": 4.465790334598712e-06, + "loss": 0.4503, + "step": 4082 + }, + { + "epoch": 2.71, + "learning_rate": 4.461378568695225e-06, + "loss": 0.435, + "step": 4083 + }, + { + "epoch": 2.71, + "learning_rate": 4.456968357296408e-06, + "loss": 0.4563, + "step": 4084 + }, + { + "epoch": 2.71, + "learning_rate": 4.452559701640053e-06, + "loss": 0.4357, + "step": 4085 + }, + { + "epoch": 2.72, + "learning_rate": 4.448152602963528e-06, + "loss": 0.4659, + "step": 4086 + }, + { + "epoch": 2.72, + "learning_rate": 4.4437470625037535e-06, + "loss": 0.4486, + "step": 4087 + }, + { + "epoch": 2.72, + "learning_rate": 4.439343081497214e-06, + "loss": 0.4612, + "step": 4088 + }, + { + "epoch": 2.72, + "learning_rate": 4.434940661179965e-06, + "loss": 0.4041, + "step": 4089 + }, + { + "epoch": 2.72, + "learning_rate": 4.43053980278761e-06, + "loss": 0.4262, + "step": 4090 + }, + { + "epoch": 2.72, + "learning_rate": 4.426140507555331e-06, + "loss": 0.4269, + "step": 4091 + }, + { + "epoch": 2.72, + "learning_rate": 4.421742776717857e-06, + "loss": 0.4871, + "step": 4092 + }, + { + "epoch": 2.72, + "learning_rate": 4.41734661150948e-06, + "loss": 0.4581, + "step": 4093 + }, + { + "epoch": 2.72, + "learning_rate": 4.412952013164062e-06, + "loss": 0.4301, + "step": 4094 + }, + { + "epoch": 2.72, + "learning_rate": 4.4085589829150125e-06, + "loss": 0.4401, + "step": 4095 + }, + { + "epoch": 2.72, + "learning_rate": 4.404167521995315e-06, + "loss": 0.4588, + "step": 4096 + }, + { + "epoch": 2.72, + "learning_rate": 4.3997776316374995e-06, + "loss": 0.4712, + "step": 4097 + }, + { + "epoch": 2.72, + "learning_rate": 4.395389313073659e-06, + "loss": 0.4159, + "step": 4098 + }, + { + "epoch": 2.72, + "learning_rate": 4.3910025675354515e-06, + "loss": 0.4163, + "step": 4099 + }, + { + "epoch": 2.73, + "learning_rate": 4.386617396254085e-06, + "loss": 0.4337, + "step": 4100 + }, + { + "epoch": 2.73, + "learning_rate": 4.3822338004603336e-06, + "loss": 0.4797, + "step": 4101 + }, + { + "epoch": 2.73, + "learning_rate": 4.37785178138452e-06, + "loss": 0.4334, + "step": 4102 + }, + { + "epoch": 2.73, + "learning_rate": 4.373471340256539e-06, + "loss": 0.4503, + "step": 4103 + }, + { + "epoch": 2.73, + "learning_rate": 4.36909247830582e-06, + "loss": 0.4302, + "step": 4104 + }, + { + "epoch": 2.73, + "learning_rate": 4.364715196761368e-06, + "loss": 0.4162, + "step": 4105 + }, + { + "epoch": 2.73, + "learning_rate": 4.360339496851742e-06, + "loss": 0.4677, + "step": 4106 + }, + { + "epoch": 2.73, + "learning_rate": 4.355965379805048e-06, + "loss": 0.446, + "step": 4107 + }, + { + "epoch": 2.73, + "learning_rate": 4.351592846848961e-06, + "loss": 0.4509, + "step": 4108 + }, + { + "epoch": 2.73, + "learning_rate": 4.347221899210698e-06, + "loss": 0.4808, + "step": 4109 + }, + { + "epoch": 2.73, + "learning_rate": 4.342852538117039e-06, + "loss": 0.4909, + "step": 4110 + }, + { + "epoch": 2.73, + "learning_rate": 4.338484764794312e-06, + "loss": 0.4291, + "step": 4111 + }, + { + "epoch": 2.73, + "learning_rate": 4.334118580468411e-06, + "loss": 0.4433, + "step": 4112 + }, + { + "epoch": 2.73, + "learning_rate": 4.32975398636477e-06, + "loss": 0.3994, + "step": 4113 + }, + { + "epoch": 2.73, + "learning_rate": 4.325390983708388e-06, + "loss": 0.4079, + "step": 4114 + }, + { + "epoch": 2.74, + "learning_rate": 4.32102957372382e-06, + "loss": 0.4606, + "step": 4115 + }, + { + "epoch": 2.74, + "learning_rate": 4.316669757635153e-06, + "loss": 0.4422, + "step": 4116 + }, + { + "epoch": 2.74, + "learning_rate": 4.31231153666605e-06, + "loss": 0.4542, + "step": 4117 + }, + { + "epoch": 2.74, + "learning_rate": 4.30795491203971e-06, + "loss": 0.4131, + "step": 4118 + }, + { + "epoch": 2.74, + "learning_rate": 4.3035998849789e-06, + "loss": 0.417, + "step": 4119 + }, + { + "epoch": 2.74, + "learning_rate": 4.299246456705921e-06, + "loss": 0.4232, + "step": 4120 + }, + { + "epoch": 2.74, + "learning_rate": 4.2948946284426405e-06, + "loss": 0.447, + "step": 4121 + }, + { + "epoch": 2.74, + "learning_rate": 4.290544401410468e-06, + "loss": 0.4466, + "step": 4122 + }, + { + "epoch": 2.74, + "learning_rate": 4.286195776830362e-06, + "loss": 0.4679, + "step": 4123 + }, + { + "epoch": 2.74, + "learning_rate": 4.281848755922842e-06, + "loss": 0.4319, + "step": 4124 + }, + { + "epoch": 2.74, + "learning_rate": 4.277503339907961e-06, + "loss": 0.4105, + "step": 4125 + }, + { + "epoch": 2.74, + "learning_rate": 4.273159530005343e-06, + "loss": 0.4393, + "step": 4126 + }, + { + "epoch": 2.74, + "learning_rate": 4.268817327434141e-06, + "loss": 0.419, + "step": 4127 + }, + { + "epoch": 2.74, + "learning_rate": 4.2644767334130656e-06, + "loss": 0.4209, + "step": 4128 + }, + { + "epoch": 2.74, + "learning_rate": 4.26013774916038e-06, + "loss": 0.4325, + "step": 4129 + }, + { + "epoch": 2.75, + "learning_rate": 4.255800375893885e-06, + "loss": 0.4335, + "step": 4130 + }, + { + "epoch": 2.75, + "learning_rate": 4.2514646148309415e-06, + "loss": 0.4273, + "step": 4131 + }, + { + "epoch": 2.75, + "learning_rate": 4.247130467188448e-06, + "loss": 0.4356, + "step": 4132 + }, + { + "epoch": 2.75, + "learning_rate": 4.242797934182853e-06, + "loss": 0.4615, + "step": 4133 + }, + { + "epoch": 2.75, + "learning_rate": 4.238467017030156e-06, + "loss": 0.4425, + "step": 4134 + }, + { + "epoch": 2.75, + "learning_rate": 4.234137716945897e-06, + "loss": 0.4223, + "step": 4135 + }, + { + "epoch": 2.75, + "learning_rate": 4.229810035145168e-06, + "loss": 0.4459, + "step": 4136 + }, + { + "epoch": 2.75, + "learning_rate": 4.225483972842598e-06, + "loss": 0.4132, + "step": 4137 + }, + { + "epoch": 2.75, + "learning_rate": 4.221159531252375e-06, + "loss": 0.423, + "step": 4138 + }, + { + "epoch": 2.75, + "learning_rate": 4.21683671158822e-06, + "loss": 0.4428, + "step": 4139 + }, + { + "epoch": 2.75, + "learning_rate": 4.212515515063399e-06, + "loss": 0.4683, + "step": 4140 + }, + { + "epoch": 2.75, + "learning_rate": 4.2081959428907344e-06, + "loss": 0.4144, + "step": 4141 + }, + { + "epoch": 2.75, + "learning_rate": 4.203877996282577e-06, + "loss": 0.4614, + "step": 4142 + }, + { + "epoch": 2.75, + "learning_rate": 4.199561676450837e-06, + "loss": 0.4072, + "step": 4143 + }, + { + "epoch": 2.75, + "learning_rate": 4.195246984606957e-06, + "loss": 0.4281, + "step": 4144 + }, + { + "epoch": 2.76, + "learning_rate": 4.1909339219619225e-06, + "loss": 0.3942, + "step": 4145 + }, + { + "epoch": 2.76, + "learning_rate": 4.186622489726265e-06, + "loss": 0.4136, + "step": 4146 + }, + { + "epoch": 2.76, + "learning_rate": 4.182312689110062e-06, + "loss": 0.4671, + "step": 4147 + }, + { + "epoch": 2.76, + "learning_rate": 4.1780045213229316e-06, + "loss": 0.4197, + "step": 4148 + }, + { + "epoch": 2.76, + "learning_rate": 4.173697987574028e-06, + "loss": 0.4237, + "step": 4149 + }, + { + "epoch": 2.76, + "learning_rate": 4.16939308907205e-06, + "loss": 0.4691, + "step": 4150 + }, + { + "epoch": 2.76, + "learning_rate": 4.165089827025236e-06, + "loss": 0.4331, + "step": 4151 + }, + { + "epoch": 2.76, + "learning_rate": 4.160788202641373e-06, + "loss": 0.4336, + "step": 4152 + }, + { + "epoch": 2.76, + "learning_rate": 4.156488217127774e-06, + "loss": 0.4559, + "step": 4153 + }, + { + "epoch": 2.76, + "learning_rate": 4.152189871691306e-06, + "loss": 0.4417, + "step": 4154 + }, + { + "epoch": 2.76, + "learning_rate": 4.147893167538375e-06, + "loss": 0.4707, + "step": 4155 + }, + { + "epoch": 2.76, + "learning_rate": 4.143598105874908e-06, + "loss": 0.4617, + "step": 4156 + }, + { + "epoch": 2.76, + "learning_rate": 4.139304687906395e-06, + "loss": 0.4391, + "step": 4157 + }, + { + "epoch": 2.76, + "learning_rate": 4.135012914837846e-06, + "loss": 0.4303, + "step": 4158 + }, + { + "epoch": 2.76, + "learning_rate": 4.130722787873825e-06, + "loss": 0.4488, + "step": 4159 + }, + { + "epoch": 2.77, + "learning_rate": 4.126434308218421e-06, + "loss": 0.4475, + "step": 4160 + }, + { + "epoch": 2.77, + "learning_rate": 4.12214747707527e-06, + "loss": 0.4476, + "step": 4161 + }, + { + "epoch": 2.77, + "learning_rate": 4.117862295647539e-06, + "loss": 0.4312, + "step": 4162 + }, + { + "epoch": 2.77, + "learning_rate": 4.113578765137931e-06, + "loss": 0.4398, + "step": 4163 + }, + { + "epoch": 2.77, + "learning_rate": 4.109296886748695e-06, + "loss": 0.4868, + "step": 4164 + }, + { + "epoch": 2.77, + "learning_rate": 4.105016661681605e-06, + "loss": 0.4264, + "step": 4165 + }, + { + "epoch": 2.77, + "learning_rate": 4.10073809113798e-06, + "loss": 0.4519, + "step": 4166 + }, + { + "epoch": 2.77, + "learning_rate": 4.096461176318671e-06, + "loss": 0.4329, + "step": 4167 + }, + { + "epoch": 2.77, + "learning_rate": 4.092185918424057e-06, + "loss": 0.4167, + "step": 4168 + }, + { + "epoch": 2.77, + "learning_rate": 4.087912318654071e-06, + "loss": 0.4253, + "step": 4169 + }, + { + "epoch": 2.77, + "learning_rate": 4.083640378208156e-06, + "loss": 0.436, + "step": 4170 + }, + { + "epoch": 2.77, + "learning_rate": 4.079370098285311e-06, + "loss": 0.4489, + "step": 4171 + }, + { + "epoch": 2.77, + "learning_rate": 4.075101480084058e-06, + "loss": 0.4391, + "step": 4172 + }, + { + "epoch": 2.77, + "learning_rate": 4.07083452480245e-06, + "loss": 0.4768, + "step": 4173 + }, + { + "epoch": 2.77, + "learning_rate": 4.066569233638083e-06, + "loss": 0.4319, + "step": 4174 + }, + { + "epoch": 2.78, + "learning_rate": 4.0623056077880775e-06, + "loss": 0.4375, + "step": 4175 + }, + { + "epoch": 2.78, + "learning_rate": 4.0580436484490925e-06, + "loss": 0.451, + "step": 4176 + }, + { + "epoch": 2.78, + "learning_rate": 4.053783356817313e-06, + "loss": 0.4474, + "step": 4177 + }, + { + "epoch": 2.78, + "learning_rate": 4.0495247340884684e-06, + "loss": 0.4197, + "step": 4178 + }, + { + "epoch": 2.78, + "learning_rate": 4.045267781457797e-06, + "loss": 0.4391, + "step": 4179 + }, + { + "epoch": 2.78, + "learning_rate": 4.04101250012009e-06, + "loss": 0.4508, + "step": 4180 + }, + { + "epoch": 2.78, + "learning_rate": 4.036758891269663e-06, + "loss": 0.4634, + "step": 4181 + }, + { + "epoch": 2.78, + "learning_rate": 4.032506956100356e-06, + "loss": 0.4542, + "step": 4182 + }, + { + "epoch": 2.78, + "learning_rate": 4.0282566958055505e-06, + "loss": 0.4723, + "step": 4183 + }, + { + "epoch": 2.78, + "learning_rate": 4.024008111578147e-06, + "loss": 0.4217, + "step": 4184 + }, + { + "epoch": 2.78, + "learning_rate": 4.0197612046105815e-06, + "loss": 0.4617, + "step": 4185 + }, + { + "epoch": 2.78, + "learning_rate": 4.015515976094815e-06, + "loss": 0.4487, + "step": 4186 + }, + { + "epoch": 2.78, + "learning_rate": 4.011272427222345e-06, + "loss": 0.4489, + "step": 4187 + }, + { + "epoch": 2.78, + "learning_rate": 4.0070305591841885e-06, + "loss": 0.4625, + "step": 4188 + }, + { + "epoch": 2.78, + "learning_rate": 4.002790373170896e-06, + "loss": 0.4363, + "step": 4189 + }, + { + "epoch": 2.79, + "learning_rate": 3.998551870372554e-06, + "loss": 0.4572, + "step": 4190 + }, + { + "epoch": 2.79, + "learning_rate": 3.994315051978753e-06, + "loss": 0.4542, + "step": 4191 + }, + { + "epoch": 2.79, + "learning_rate": 3.990079919178636e-06, + "loss": 0.4342, + "step": 4192 + }, + { + "epoch": 2.79, + "learning_rate": 3.9858464731608545e-06, + "loss": 0.4327, + "step": 4193 + }, + { + "epoch": 2.79, + "learning_rate": 3.981614715113602e-06, + "loss": 0.4045, + "step": 4194 + }, + { + "epoch": 2.79, + "learning_rate": 3.977384646224584e-06, + "loss": 0.4705, + "step": 4195 + }, + { + "epoch": 2.79, + "learning_rate": 3.973156267681044e-06, + "loss": 0.4499, + "step": 4196 + }, + { + "epoch": 2.79, + "learning_rate": 3.968929580669743e-06, + "loss": 0.4813, + "step": 4197 + }, + { + "epoch": 2.79, + "learning_rate": 3.9647045863769685e-06, + "loss": 0.4303, + "step": 4198 + }, + { + "epoch": 2.79, + "learning_rate": 3.960481285988538e-06, + "loss": 0.4538, + "step": 4199 + }, + { + "epoch": 2.79, + "learning_rate": 3.956259680689784e-06, + "loss": 0.4587, + "step": 4200 + }, + { + "epoch": 2.79, + "learning_rate": 3.952039771665575e-06, + "loss": 0.4618, + "step": 4201 + }, + { + "epoch": 2.79, + "learning_rate": 3.9478215601002955e-06, + "loss": 0.4581, + "step": 4202 + }, + { + "epoch": 2.79, + "learning_rate": 3.943605047177852e-06, + "loss": 0.4384, + "step": 4203 + }, + { + "epoch": 2.8, + "learning_rate": 3.9393902340816835e-06, + "loss": 0.4719, + "step": 4204 + }, + { + "epoch": 2.8, + "learning_rate": 3.935177121994741e-06, + "loss": 0.4446, + "step": 4205 + }, + { + "epoch": 2.8, + "learning_rate": 3.930965712099508e-06, + "loss": 0.3973, + "step": 4206 + }, + { + "epoch": 2.8, + "learning_rate": 3.926756005577984e-06, + "loss": 0.4523, + "step": 4207 + }, + { + "epoch": 2.8, + "learning_rate": 3.922548003611688e-06, + "loss": 0.4173, + "step": 4208 + }, + { + "epoch": 2.8, + "learning_rate": 3.918341707381672e-06, + "loss": 0.4462, + "step": 4209 + }, + { + "epoch": 2.8, + "learning_rate": 3.9141371180684925e-06, + "loss": 0.4445, + "step": 4210 + }, + { + "epoch": 2.8, + "learning_rate": 3.909934236852246e-06, + "loss": 0.4312, + "step": 4211 + }, + { + "epoch": 2.8, + "learning_rate": 3.9057330649125325e-06, + "loss": 0.4166, + "step": 4212 + }, + { + "epoch": 2.8, + "learning_rate": 3.901533603428489e-06, + "loss": 0.4373, + "step": 4213 + }, + { + "epoch": 2.8, + "learning_rate": 3.897335853578748e-06, + "loss": 0.4164, + "step": 4214 + }, + { + "epoch": 2.8, + "learning_rate": 3.893139816541487e-06, + "loss": 0.4386, + "step": 4215 + }, + { + "epoch": 2.8, + "learning_rate": 3.888945493494393e-06, + "loss": 0.4224, + "step": 4216 + }, + { + "epoch": 2.8, + "learning_rate": 3.884752885614665e-06, + "loss": 0.454, + "step": 4217 + }, + { + "epoch": 2.8, + "learning_rate": 3.8805619940790355e-06, + "loss": 0.4281, + "step": 4218 + }, + { + "epoch": 2.81, + "learning_rate": 3.876372820063741e-06, + "loss": 0.4241, + "step": 4219 + }, + { + "epoch": 2.81, + "learning_rate": 3.872185364744543e-06, + "loss": 0.464, + "step": 4220 + }, + { + "epoch": 2.81, + "learning_rate": 3.867999629296715e-06, + "loss": 0.4272, + "step": 4221 + }, + { + "epoch": 2.81, + "learning_rate": 3.863815614895056e-06, + "loss": 0.418, + "step": 4222 + }, + { + "epoch": 2.81, + "learning_rate": 3.859633322713882e-06, + "loss": 0.4392, + "step": 4223 + }, + { + "epoch": 2.81, + "learning_rate": 3.855452753927018e-06, + "loss": 0.4264, + "step": 4224 + }, + { + "epoch": 2.81, + "learning_rate": 3.851273909707809e-06, + "loss": 0.4696, + "step": 4225 + }, + { + "epoch": 2.81, + "learning_rate": 3.847096791229111e-06, + "loss": 0.3982, + "step": 4226 + }, + { + "epoch": 2.81, + "learning_rate": 3.842921399663309e-06, + "loss": 0.4344, + "step": 4227 + }, + { + "epoch": 2.81, + "learning_rate": 3.838747736182289e-06, + "loss": 0.4585, + "step": 4228 + }, + { + "epoch": 2.81, + "learning_rate": 3.834575801957463e-06, + "loss": 0.408, + "step": 4229 + }, + { + "epoch": 2.81, + "learning_rate": 3.8304055981597495e-06, + "loss": 0.3865, + "step": 4230 + }, + { + "epoch": 2.81, + "learning_rate": 3.826237125959582e-06, + "loss": 0.4493, + "step": 4231 + }, + { + "epoch": 2.81, + "learning_rate": 3.822070386526916e-06, + "loss": 0.4511, + "step": 4232 + }, + { + "epoch": 2.81, + "learning_rate": 3.817905381031208e-06, + "loss": 0.4405, + "step": 4233 + }, + { + "epoch": 2.82, + "learning_rate": 3.813742110641443e-06, + "loss": 0.4229, + "step": 4234 + }, + { + "epoch": 2.82, + "learning_rate": 3.809580576526104e-06, + "loss": 0.4621, + "step": 4235 + }, + { + "epoch": 2.82, + "learning_rate": 3.8054207798531995e-06, + "loss": 0.407, + "step": 4236 + }, + { + "epoch": 2.82, + "learning_rate": 3.8012627217902408e-06, + "loss": 0.4144, + "step": 4237 + }, + { + "epoch": 2.82, + "learning_rate": 3.7971064035042515e-06, + "loss": 0.4219, + "step": 4238 + }, + { + "epoch": 2.82, + "learning_rate": 3.7929518261617794e-06, + "loss": 0.4361, + "step": 4239 + }, + { + "epoch": 2.82, + "learning_rate": 3.7887989909288648e-06, + "loss": 0.451, + "step": 4240 + }, + { + "epoch": 2.82, + "learning_rate": 3.7846478989710776e-06, + "loss": 0.4412, + "step": 4241 + }, + { + "epoch": 2.82, + "learning_rate": 3.7804985514534853e-06, + "loss": 0.4464, + "step": 4242 + }, + { + "epoch": 2.82, + "learning_rate": 3.776350949540666e-06, + "loss": 0.457, + "step": 4243 + }, + { + "epoch": 2.82, + "learning_rate": 3.7722050943967203e-06, + "loss": 0.4425, + "step": 4244 + }, + { + "epoch": 2.82, + "learning_rate": 3.7680609871852436e-06, + "loss": 0.4208, + "step": 4245 + }, + { + "epoch": 2.82, + "learning_rate": 3.763918629069352e-06, + "loss": 0.4589, + "step": 4246 + }, + { + "epoch": 2.82, + "learning_rate": 3.7597780212116653e-06, + "loss": 0.4631, + "step": 4247 + }, + { + "epoch": 2.82, + "learning_rate": 3.7556391647743074e-06, + "loss": 0.478, + "step": 4248 + }, + { + "epoch": 2.83, + "learning_rate": 3.7515020609189234e-06, + "loss": 0.4755, + "step": 4249 + }, + { + "epoch": 2.83, + "learning_rate": 3.7473667108066524e-06, + "loss": 0.4718, + "step": 4250 + }, + { + "epoch": 2.83, + "learning_rate": 3.743233115598156e-06, + "loss": 0.4399, + "step": 4251 + }, + { + "epoch": 2.83, + "learning_rate": 3.739101276453586e-06, + "loss": 0.4437, + "step": 4252 + }, + { + "epoch": 2.83, + "learning_rate": 3.7349711945326238e-06, + "loss": 0.4421, + "step": 4253 + }, + { + "epoch": 2.83, + "learning_rate": 3.730842870994428e-06, + "loss": 0.4646, + "step": 4254 + }, + { + "epoch": 2.83, + "learning_rate": 3.726716306997692e-06, + "loss": 0.4549, + "step": 4255 + }, + { + "epoch": 2.83, + "learning_rate": 3.7225915037005966e-06, + "loss": 0.4529, + "step": 4256 + }, + { + "epoch": 2.83, + "learning_rate": 3.718468462260838e-06, + "loss": 0.4405, + "step": 4257 + }, + { + "epoch": 2.83, + "learning_rate": 3.7143471838356182e-06, + "loss": 0.4525, + "step": 4258 + }, + { + "epoch": 2.83, + "learning_rate": 3.7102276695816397e-06, + "loss": 0.4476, + "step": 4259 + }, + { + "epoch": 2.83, + "learning_rate": 3.70610992065511e-06, + "loss": 0.4759, + "step": 4260 + }, + { + "epoch": 2.83, + "learning_rate": 3.7019939382117397e-06, + "loss": 0.4314, + "step": 4261 + }, + { + "epoch": 2.83, + "learning_rate": 3.6978797234067533e-06, + "loss": 0.4395, + "step": 4262 + }, + { + "epoch": 2.83, + "learning_rate": 3.6937672773948663e-06, + "loss": 0.4323, + "step": 4263 + }, + { + "epoch": 2.84, + "learning_rate": 3.689656601330307e-06, + "loss": 0.4383, + "step": 4264 + }, + { + "epoch": 2.84, + "learning_rate": 3.68554769636681e-06, + "loss": 0.4169, + "step": 4265 + }, + { + "epoch": 2.84, + "learning_rate": 3.6814405636575935e-06, + "loss": 0.4528, + "step": 4266 + }, + { + "epoch": 2.84, + "learning_rate": 3.677335204355401e-06, + "loss": 0.4464, + "step": 4267 + }, + { + "epoch": 2.84, + "learning_rate": 3.673231619612464e-06, + "loss": 0.4339, + "step": 4268 + }, + { + "epoch": 2.84, + "learning_rate": 3.669129810580525e-06, + "loss": 0.4291, + "step": 4269 + }, + { + "epoch": 2.84, + "learning_rate": 3.665029778410819e-06, + "loss": 0.459, + "step": 4270 + }, + { + "epoch": 2.84, + "learning_rate": 3.660931524254092e-06, + "loss": 0.4409, + "step": 4271 + }, + { + "epoch": 2.84, + "learning_rate": 3.656835049260584e-06, + "loss": 0.4234, + "step": 4272 + }, + { + "epoch": 2.84, + "learning_rate": 3.6527403545800344e-06, + "loss": 0.4002, + "step": 4273 + }, + { + "epoch": 2.84, + "learning_rate": 3.6486474413616913e-06, + "loss": 0.4492, + "step": 4274 + }, + { + "epoch": 2.84, + "learning_rate": 3.6445563107542925e-06, + "loss": 0.4307, + "step": 4275 + }, + { + "epoch": 2.84, + "learning_rate": 3.6404669639060875e-06, + "loss": 0.417, + "step": 4276 + }, + { + "epoch": 2.84, + "learning_rate": 3.6363794019648147e-06, + "loss": 0.4121, + "step": 4277 + }, + { + "epoch": 2.84, + "learning_rate": 3.632293626077711e-06, + "loss": 0.4349, + "step": 4278 + }, + { + "epoch": 2.85, + "learning_rate": 3.628209637391523e-06, + "loss": 0.4208, + "step": 4279 + }, + { + "epoch": 2.85, + "learning_rate": 3.624127437052484e-06, + "loss": 0.4198, + "step": 4280 + }, + { + "epoch": 2.85, + "learning_rate": 3.620047026206335e-06, + "loss": 0.4568, + "step": 4281 + }, + { + "epoch": 2.85, + "learning_rate": 3.615968405998308e-06, + "loss": 0.4398, + "step": 4282 + }, + { + "epoch": 2.85, + "learning_rate": 3.61189157757313e-06, + "loss": 0.4163, + "step": 4283 + }, + { + "epoch": 2.85, + "learning_rate": 3.6078165420750366e-06, + "loss": 0.4107, + "step": 4284 + }, + { + "epoch": 2.85, + "learning_rate": 3.6037433006477475e-06, + "loss": 0.4602, + "step": 4285 + }, + { + "epoch": 2.85, + "learning_rate": 3.59967185443449e-06, + "loss": 0.4634, + "step": 4286 + }, + { + "epoch": 2.85, + "learning_rate": 3.595602204577975e-06, + "loss": 0.4582, + "step": 4287 + }, + { + "epoch": 2.85, + "learning_rate": 3.5915343522204284e-06, + "loss": 0.4493, + "step": 4288 + }, + { + "epoch": 2.85, + "learning_rate": 3.5874682985035437e-06, + "loss": 0.4612, + "step": 4289 + }, + { + "epoch": 2.85, + "learning_rate": 3.5834040445685325e-06, + "loss": 0.4329, + "step": 4290 + }, + { + "epoch": 2.85, + "learning_rate": 3.579341591556099e-06, + "loss": 0.4494, + "step": 4291 + }, + { + "epoch": 2.85, + "learning_rate": 3.575280940606429e-06, + "loss": 0.46, + "step": 4292 + }, + { + "epoch": 2.85, + "learning_rate": 3.571222092859218e-06, + "loss": 0.4479, + "step": 4293 + }, + { + "epoch": 2.86, + "learning_rate": 3.567165049453644e-06, + "loss": 0.431, + "step": 4294 + }, + { + "epoch": 2.86, + "learning_rate": 3.5631098115283833e-06, + "loss": 0.4712, + "step": 4295 + }, + { + "epoch": 2.86, + "learning_rate": 3.5590563802216004e-06, + "loss": 0.4498, + "step": 4296 + }, + { + "epoch": 2.86, + "learning_rate": 3.5550047566709646e-06, + "loss": 0.4396, + "step": 4297 + }, + { + "epoch": 2.86, + "learning_rate": 3.5509549420136235e-06, + "loss": 0.4477, + "step": 4298 + }, + { + "epoch": 2.86, + "learning_rate": 3.5469069373862296e-06, + "loss": 0.4312, + "step": 4299 + }, + { + "epoch": 2.86, + "learning_rate": 3.5428607439249197e-06, + "loss": 0.437, + "step": 4300 + }, + { + "epoch": 2.86, + "learning_rate": 3.5388163627653203e-06, + "loss": 0.4517, + "step": 4301 + }, + { + "epoch": 2.86, + "learning_rate": 3.5347737950425587e-06, + "loss": 0.44, + "step": 4302 + }, + { + "epoch": 2.86, + "learning_rate": 3.5307330418912424e-06, + "loss": 0.4242, + "step": 4303 + }, + { + "epoch": 2.86, + "learning_rate": 3.5266941044454816e-06, + "loss": 0.4391, + "step": 4304 + }, + { + "epoch": 2.86, + "learning_rate": 3.5226569838388647e-06, + "loss": 0.4229, + "step": 4305 + }, + { + "epoch": 2.86, + "learning_rate": 3.518621681204475e-06, + "loss": 0.4624, + "step": 4306 + }, + { + "epoch": 2.86, + "learning_rate": 3.514588197674891e-06, + "loss": 0.4474, + "step": 4307 + }, + { + "epoch": 2.87, + "learning_rate": 3.510556534382169e-06, + "loss": 0.4697, + "step": 4308 + }, + { + "epoch": 2.87, + "learning_rate": 3.50652669245787e-06, + "loss": 0.4441, + "step": 4309 + }, + { + "epoch": 2.87, + "learning_rate": 3.502498673033026e-06, + "loss": 0.3996, + "step": 4310 + }, + { + "epoch": 2.87, + "learning_rate": 3.498472477238174e-06, + "loss": 0.4416, + "step": 4311 + }, + { + "epoch": 2.87, + "learning_rate": 3.4944481062033287e-06, + "loss": 0.4109, + "step": 4312 + }, + { + "epoch": 2.87, + "learning_rate": 3.490425561057993e-06, + "loss": 0.4199, + "step": 4313 + }, + { + "epoch": 2.87, + "learning_rate": 3.4864048429311647e-06, + "loss": 0.4525, + "step": 4314 + }, + { + "epoch": 2.87, + "learning_rate": 3.482385952951318e-06, + "loss": 0.4347, + "step": 4315 + }, + { + "epoch": 2.87, + "learning_rate": 3.4783688922464275e-06, + "loss": 0.429, + "step": 4316 + }, + { + "epoch": 2.87, + "learning_rate": 3.4743536619439446e-06, + "loss": 0.4719, + "step": 4317 + }, + { + "epoch": 2.87, + "learning_rate": 3.470340263170804e-06, + "loss": 0.419, + "step": 4318 + }, + { + "epoch": 2.87, + "learning_rate": 3.466328697053438e-06, + "loss": 0.431, + "step": 4319 + }, + { + "epoch": 2.87, + "learning_rate": 3.4623189647177533e-06, + "loss": 0.4558, + "step": 4320 + }, + { + "epoch": 2.87, + "learning_rate": 3.458311067289153e-06, + "loss": 0.457, + "step": 4321 + }, + { + "epoch": 2.87, + "learning_rate": 3.454305005892515e-06, + "loss": 0.4181, + "step": 4322 + }, + { + "epoch": 2.88, + "learning_rate": 3.450300781652208e-06, + "loss": 0.4683, + "step": 4323 + }, + { + "epoch": 2.88, + "learning_rate": 3.446298395692077e-06, + "loss": 0.4823, + "step": 4324 + }, + { + "epoch": 2.88, + "learning_rate": 3.442297849135462e-06, + "loss": 0.4603, + "step": 4325 + }, + { + "epoch": 2.88, + "learning_rate": 3.4382991431051847e-06, + "loss": 0.4328, + "step": 4326 + }, + { + "epoch": 2.88, + "learning_rate": 3.43430227872354e-06, + "loss": 0.4391, + "step": 4327 + }, + { + "epoch": 2.88, + "learning_rate": 3.430307257112324e-06, + "loss": 0.4487, + "step": 4328 + }, + { + "epoch": 2.88, + "learning_rate": 3.4263140793927917e-06, + "loss": 0.454, + "step": 4329 + }, + { + "epoch": 2.88, + "learning_rate": 3.4223227466857045e-06, + "loss": 0.4178, + "step": 4330 + }, + { + "epoch": 2.88, + "learning_rate": 3.418333260111286e-06, + "loss": 0.4356, + "step": 4331 + }, + { + "epoch": 2.88, + "learning_rate": 3.4143456207892555e-06, + "loss": 0.4359, + "step": 4332 + }, + { + "epoch": 2.88, + "learning_rate": 3.410359829838814e-06, + "loss": 0.4786, + "step": 4333 + }, + { + "epoch": 2.88, + "learning_rate": 3.4063758883786334e-06, + "loss": 0.4715, + "step": 4334 + }, + { + "epoch": 2.88, + "learning_rate": 3.4023937975268728e-06, + "loss": 0.4947, + "step": 4335 + }, + { + "epoch": 2.88, + "learning_rate": 3.398413558401168e-06, + "loss": 0.4878, + "step": 4336 + }, + { + "epoch": 2.88, + "learning_rate": 3.394435172118644e-06, + "loss": 0.4288, + "step": 4337 + }, + { + "epoch": 2.89, + "learning_rate": 3.390458639795895e-06, + "loss": 0.4409, + "step": 4338 + }, + { + "epoch": 2.89, + "learning_rate": 3.386483962549004e-06, + "loss": 0.4346, + "step": 4339 + }, + { + "epoch": 2.89, + "learning_rate": 3.3825111414935287e-06, + "loss": 0.4387, + "step": 4340 + }, + { + "epoch": 2.89, + "learning_rate": 3.3785401777445003e-06, + "loss": 0.4444, + "step": 4341 + }, + { + "epoch": 2.89, + "learning_rate": 3.3745710724164437e-06, + "loss": 0.4477, + "step": 4342 + }, + { + "epoch": 2.89, + "learning_rate": 3.3706038266233456e-06, + "loss": 0.4481, + "step": 4343 + }, + { + "epoch": 2.89, + "learning_rate": 3.3666384414786834e-06, + "loss": 0.464, + "step": 4344 + }, + { + "epoch": 2.89, + "learning_rate": 3.3626749180954033e-06, + "loss": 0.467, + "step": 4345 + }, + { + "epoch": 2.89, + "learning_rate": 3.3587132575859383e-06, + "loss": 0.4718, + "step": 4346 + }, + { + "epoch": 2.89, + "learning_rate": 3.354753461062189e-06, + "loss": 0.4453, + "step": 4347 + }, + { + "epoch": 2.89, + "learning_rate": 3.3507955296355364e-06, + "loss": 0.4207, + "step": 4348 + }, + { + "epoch": 2.89, + "learning_rate": 3.346839464416842e-06, + "loss": 0.425, + "step": 4349 + }, + { + "epoch": 2.89, + "learning_rate": 3.342885266516436e-06, + "loss": 0.4642, + "step": 4350 + }, + { + "epoch": 2.89, + "learning_rate": 3.338932937044135e-06, + "loss": 0.4285, + "step": 4351 + }, + { + "epoch": 2.89, + "learning_rate": 3.33498247710922e-06, + "loss": 0.4545, + "step": 4352 + }, + { + "epoch": 2.9, + "learning_rate": 3.3310338878204507e-06, + "loss": 0.4381, + "step": 4353 + }, + { + "epoch": 2.9, + "learning_rate": 3.3270871702860686e-06, + "loss": 0.4506, + "step": 4354 + }, + { + "epoch": 2.9, + "learning_rate": 3.3231423256137784e-06, + "loss": 0.4668, + "step": 4355 + }, + { + "epoch": 2.9, + "learning_rate": 3.3191993549107725e-06, + "loss": 0.4344, + "step": 4356 + }, + { + "epoch": 2.9, + "learning_rate": 3.3152582592837058e-06, + "loss": 0.4408, + "step": 4357 + }, + { + "epoch": 2.9, + "learning_rate": 3.3113190398387076e-06, + "loss": 0.432, + "step": 4358 + }, + { + "epoch": 2.9, + "learning_rate": 3.307381697681392e-06, + "loss": 0.426, + "step": 4359 + }, + { + "epoch": 2.9, + "learning_rate": 3.3034462339168317e-06, + "loss": 0.439, + "step": 4360 + }, + { + "epoch": 2.9, + "learning_rate": 3.2995126496495857e-06, + "loss": 0.4136, + "step": 4361 + }, + { + "epoch": 2.9, + "learning_rate": 3.295580945983671e-06, + "loss": 0.4472, + "step": 4362 + }, + { + "epoch": 2.9, + "learning_rate": 3.2916511240225958e-06, + "loss": 0.4354, + "step": 4363 + }, + { + "epoch": 2.9, + "learning_rate": 3.2877231848693134e-06, + "loss": 0.47, + "step": 4364 + }, + { + "epoch": 2.9, + "learning_rate": 3.283797129626274e-06, + "loss": 0.4369, + "step": 4365 + }, + { + "epoch": 2.9, + "learning_rate": 3.2798729593953903e-06, + "loss": 0.4427, + "step": 4366 + }, + { + "epoch": 2.9, + "learning_rate": 3.275950675278039e-06, + "loss": 0.4411, + "step": 4367 + }, + { + "epoch": 2.91, + "learning_rate": 3.272030278375079e-06, + "loss": 0.4469, + "step": 4368 + }, + { + "epoch": 2.91, + "learning_rate": 3.2681117697868325e-06, + "loss": 0.4536, + "step": 4369 + }, + { + "epoch": 2.91, + "learning_rate": 3.264195150613091e-06, + "loss": 0.4256, + "step": 4370 + }, + { + "epoch": 2.91, + "learning_rate": 3.260280421953115e-06, + "loss": 0.4578, + "step": 4371 + }, + { + "epoch": 2.91, + "learning_rate": 3.256367584905643e-06, + "loss": 0.4293, + "step": 4372 + }, + { + "epoch": 2.91, + "learning_rate": 3.2524566405688716e-06, + "loss": 0.4374, + "step": 4373 + }, + { + "epoch": 2.91, + "learning_rate": 3.248547590040477e-06, + "loss": 0.4327, + "step": 4374 + }, + { + "epoch": 2.91, + "learning_rate": 3.244640434417595e-06, + "loss": 0.4799, + "step": 4375 + }, + { + "epoch": 2.91, + "learning_rate": 3.24073517479683e-06, + "loss": 0.4319, + "step": 4376 + }, + { + "epoch": 2.91, + "learning_rate": 3.2368318122742628e-06, + "loss": 0.4631, + "step": 4377 + }, + { + "epoch": 2.91, + "learning_rate": 3.232930347945429e-06, + "loss": 0.4231, + "step": 4378 + }, + { + "epoch": 2.91, + "learning_rate": 3.2290307829053456e-06, + "loss": 0.4184, + "step": 4379 + }, + { + "epoch": 2.91, + "learning_rate": 3.2251331182484868e-06, + "loss": 0.4542, + "step": 4380 + }, + { + "epoch": 2.91, + "learning_rate": 3.2212373550687903e-06, + "loss": 0.4319, + "step": 4381 + }, + { + "epoch": 2.91, + "learning_rate": 3.2173434944596747e-06, + "loss": 0.4312, + "step": 4382 + }, + { + "epoch": 2.92, + "learning_rate": 3.213451537514007e-06, + "loss": 0.4563, + "step": 4383 + }, + { + "epoch": 2.92, + "learning_rate": 3.2095614853241376e-06, + "loss": 0.4362, + "step": 4384 + }, + { + "epoch": 2.92, + "learning_rate": 3.205673338981865e-06, + "loss": 0.449, + "step": 4385 + }, + { + "epoch": 2.92, + "learning_rate": 3.201787099578467e-06, + "loss": 0.4091, + "step": 4386 + }, + { + "epoch": 2.92, + "learning_rate": 3.197902768204678e-06, + "loss": 0.4509, + "step": 4387 + }, + { + "epoch": 2.92, + "learning_rate": 3.194020345950697e-06, + "loss": 0.4645, + "step": 4388 + }, + { + "epoch": 2.92, + "learning_rate": 3.190139833906193e-06, + "loss": 0.4458, + "step": 4389 + }, + { + "epoch": 2.92, + "learning_rate": 3.1862612331602906e-06, + "loss": 0.4272, + "step": 4390 + }, + { + "epoch": 2.92, + "learning_rate": 3.182384544801589e-06, + "loss": 0.4513, + "step": 4391 + }, + { + "epoch": 2.92, + "learning_rate": 3.17850976991814e-06, + "loss": 0.4219, + "step": 4392 + }, + { + "epoch": 2.92, + "learning_rate": 3.1746369095974594e-06, + "loss": 0.4353, + "step": 4393 + }, + { + "epoch": 2.92, + "learning_rate": 3.1707659649265367e-06, + "loss": 0.4773, + "step": 4394 + }, + { + "epoch": 2.92, + "learning_rate": 3.166896936991808e-06, + "loss": 0.4513, + "step": 4395 + }, + { + "epoch": 2.92, + "learning_rate": 3.163029826879186e-06, + "loss": 0.4285, + "step": 4396 + }, + { + "epoch": 2.93, + "learning_rate": 3.159164635674035e-06, + "loss": 0.4718, + "step": 4397 + }, + { + "epoch": 2.93, + "learning_rate": 3.155301364461184e-06, + "loss": 0.4337, + "step": 4398 + }, + { + "epoch": 2.93, + "learning_rate": 3.1514400143249203e-06, + "loss": 0.4778, + "step": 4399 + }, + { + "epoch": 2.93, + "learning_rate": 3.147580586348998e-06, + "loss": 0.4533, + "step": 4400 + }, + { + "epoch": 2.93, + "learning_rate": 3.143723081616633e-06, + "loss": 0.467, + "step": 4401 + }, + { + "epoch": 2.93, + "learning_rate": 3.1398675012104885e-06, + "loss": 0.4248, + "step": 4402 + }, + { + "epoch": 2.93, + "learning_rate": 3.1360138462127075e-06, + "loss": 0.4543, + "step": 4403 + }, + { + "epoch": 2.93, + "learning_rate": 3.132162117704869e-06, + "loss": 0.4144, + "step": 4404 + }, + { + "epoch": 2.93, + "learning_rate": 3.1283123167680306e-06, + "loss": 0.4495, + "step": 4405 + }, + { + "epoch": 2.93, + "learning_rate": 3.1244644444826975e-06, + "loss": 0.454, + "step": 4406 + }, + { + "epoch": 2.93, + "learning_rate": 3.1206185019288414e-06, + "loss": 0.4439, + "step": 4407 + }, + { + "epoch": 2.93, + "learning_rate": 3.116774490185891e-06, + "loss": 0.4164, + "step": 4408 + }, + { + "epoch": 2.93, + "learning_rate": 3.1129324103327284e-06, + "loss": 0.4221, + "step": 4409 + }, + { + "epoch": 2.93, + "learning_rate": 3.1090922634476963e-06, + "loss": 0.4739, + "step": 4410 + }, + { + "epoch": 2.93, + "learning_rate": 3.1052540506085903e-06, + "loss": 0.44, + "step": 4411 + }, + { + "epoch": 2.94, + "learning_rate": 3.1014177728926755e-06, + "loss": 0.4639, + "step": 4412 + }, + { + "epoch": 2.94, + "learning_rate": 3.0975834313766594e-06, + "loss": 0.4563, + "step": 4413 + }, + { + "epoch": 2.94, + "learning_rate": 3.0937510271367177e-06, + "loss": 0.4436, + "step": 4414 + }, + { + "epoch": 2.94, + "learning_rate": 3.089920561248476e-06, + "loss": 0.4189, + "step": 4415 + }, + { + "epoch": 2.94, + "learning_rate": 3.0860920347870127e-06, + "loss": 0.4294, + "step": 4416 + }, + { + "epoch": 2.94, + "learning_rate": 3.0822654488268733e-06, + "loss": 0.4462, + "step": 4417 + }, + { + "epoch": 2.94, + "learning_rate": 3.0784408044420456e-06, + "loss": 0.4727, + "step": 4418 + }, + { + "epoch": 2.94, + "learning_rate": 3.074618102705984e-06, + "loss": 0.4233, + "step": 4419 + }, + { + "epoch": 2.94, + "learning_rate": 3.0707973446915863e-06, + "loss": 0.4338, + "step": 4420 + }, + { + "epoch": 2.94, + "learning_rate": 3.0669785314712173e-06, + "loss": 0.4456, + "step": 4421 + }, + { + "epoch": 2.94, + "learning_rate": 3.063161664116686e-06, + "loss": 0.4378, + "step": 4422 + }, + { + "epoch": 2.94, + "learning_rate": 3.0593467436992565e-06, + "loss": 0.4272, + "step": 4423 + }, + { + "epoch": 2.94, + "learning_rate": 3.0555337712896527e-06, + "loss": 0.466, + "step": 4424 + }, + { + "epoch": 2.94, + "learning_rate": 3.0517227479580425e-06, + "loss": 0.4391, + "step": 4425 + }, + { + "epoch": 2.94, + "learning_rate": 3.047913674774059e-06, + "loss": 0.426, + "step": 4426 + }, + { + "epoch": 2.95, + "learning_rate": 3.044106552806777e-06, + "loss": 0.4233, + "step": 4427 + }, + { + "epoch": 2.95, + "learning_rate": 3.0403013831247243e-06, + "loss": 0.4555, + "step": 4428 + }, + { + "epoch": 2.95, + "learning_rate": 3.0364981667958903e-06, + "loss": 0.4368, + "step": 4429 + }, + { + "epoch": 2.95, + "learning_rate": 3.0326969048877032e-06, + "loss": 0.4209, + "step": 4430 + }, + { + "epoch": 2.95, + "learning_rate": 3.0288975984670564e-06, + "loss": 0.4147, + "step": 4431 + }, + { + "epoch": 2.95, + "learning_rate": 3.0251002486002843e-06, + "loss": 0.4626, + "step": 4432 + }, + { + "epoch": 2.95, + "learning_rate": 3.0213048563531713e-06, + "loss": 0.4124, + "step": 4433 + }, + { + "epoch": 2.95, + "learning_rate": 3.017511422790963e-06, + "loss": 0.4729, + "step": 4434 + }, + { + "epoch": 2.95, + "learning_rate": 3.013719948978342e-06, + "loss": 0.4275, + "step": 4435 + }, + { + "epoch": 2.95, + "learning_rate": 3.0099304359794536e-06, + "loss": 0.4516, + "step": 4436 + }, + { + "epoch": 2.95, + "learning_rate": 3.006142884857881e-06, + "loss": 0.4363, + "step": 4437 + }, + { + "epoch": 2.95, + "learning_rate": 3.002357296676672e-06, + "loss": 0.4255, + "step": 4438 + }, + { + "epoch": 2.95, + "learning_rate": 2.9985736724982995e-06, + "loss": 0.4323, + "step": 4439 + }, + { + "epoch": 2.95, + "learning_rate": 2.9947920133847108e-06, + "loss": 0.4297, + "step": 4440 + }, + { + "epoch": 2.95, + "learning_rate": 2.991012320397283e-06, + "loss": 0.457, + "step": 4441 + }, + { + "epoch": 2.96, + "learning_rate": 2.9872345945968528e-06, + "loss": 0.4242, + "step": 4442 + }, + { + "epoch": 2.96, + "learning_rate": 2.9834588370437035e-06, + "loss": 0.4239, + "step": 4443 + }, + { + "epoch": 2.96, + "learning_rate": 2.9796850487975595e-06, + "loss": 0.4534, + "step": 4444 + }, + { + "epoch": 2.96, + "learning_rate": 2.975913230917595e-06, + "loss": 0.4305, + "step": 4445 + }, + { + "epoch": 2.96, + "learning_rate": 2.9721433844624327e-06, + "loss": 0.4283, + "step": 4446 + }, + { + "epoch": 2.96, + "learning_rate": 2.968375510490146e-06, + "loss": 0.4348, + "step": 4447 + }, + { + "epoch": 2.96, + "learning_rate": 2.9646096100582423e-06, + "loss": 0.4003, + "step": 4448 + }, + { + "epoch": 2.96, + "learning_rate": 2.9608456842236923e-06, + "loss": 0.4318, + "step": 4449 + }, + { + "epoch": 2.96, + "learning_rate": 2.9570837340428994e-06, + "loss": 0.473, + "step": 4450 + }, + { + "epoch": 2.96, + "learning_rate": 2.9533237605717125e-06, + "loss": 0.4205, + "step": 4451 + }, + { + "epoch": 2.96, + "learning_rate": 2.9495657648654364e-06, + "loss": 0.4145, + "step": 4452 + }, + { + "epoch": 2.96, + "learning_rate": 2.9458097479788073e-06, + "loss": 0.4234, + "step": 4453 + }, + { + "epoch": 2.96, + "learning_rate": 2.9420557109660197e-06, + "loss": 0.4268, + "step": 4454 + }, + { + "epoch": 2.96, + "learning_rate": 2.938303654880702e-06, + "loss": 0.4549, + "step": 4455 + } + ], + "logging_steps": 1, + "max_steps": 5940, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 1485, + "total_flos": 7210953804349440.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}