{ "best_metric": 0.9305555555555556, "best_model_checkpoint": "vit-base-patch16-224-Soybean_11-46/checkpoint-473", "epoch": 50.0, "global_step": 550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 9.09090909090909e-07, "loss": 1.677, "step": 1 }, { "epoch": 0.18, "learning_rate": 1.818181818181818e-06, "loss": 1.663, "step": 2 }, { "epoch": 0.27, "learning_rate": 2.7272727272727272e-06, "loss": 1.6847, "step": 3 }, { "epoch": 0.36, "learning_rate": 3.636363636363636e-06, "loss": 1.7074, "step": 4 }, { "epoch": 0.45, "learning_rate": 4.5454545454545455e-06, "loss": 1.6866, "step": 5 }, { "epoch": 0.55, "learning_rate": 5.4545454545454545e-06, "loss": 1.571, "step": 6 }, { "epoch": 0.64, "learning_rate": 6.363636363636363e-06, "loss": 1.5806, "step": 7 }, { "epoch": 0.73, "learning_rate": 7.272727272727272e-06, "loss": 1.5372, "step": 8 }, { "epoch": 0.82, "learning_rate": 8.181818181818183e-06, "loss": 1.51, "step": 9 }, { "epoch": 0.91, "learning_rate": 9.090909090909091e-06, "loss": 1.4441, "step": 10 }, { "epoch": 1.0, "learning_rate": 1e-05, "loss": 1.3661, "step": 11 }, { "epoch": 1.0, "eval_accuracy": 0.5069444444444444, "eval_loss": 1.3697991371154785, "eval_runtime": 1.9108, "eval_samples_per_second": 150.721, "eval_steps_per_second": 2.617, "step": 11 }, { "epoch": 1.09, "learning_rate": 1.0909090909090909e-05, "loss": 1.386, "step": 12 }, { "epoch": 1.18, "learning_rate": 1.1818181818181819e-05, "loss": 1.4072, "step": 13 }, { "epoch": 1.27, "learning_rate": 1.2727272727272727e-05, "loss": 1.3612, "step": 14 }, { "epoch": 1.36, "learning_rate": 1.3636363636363637e-05, "loss": 1.2649, "step": 15 }, { "epoch": 1.45, "learning_rate": 1.4545454545454545e-05, "loss": 1.2608, "step": 16 }, { "epoch": 1.55, "learning_rate": 1.5454545454545454e-05, "loss": 1.2141, "step": 17 }, { "epoch": 1.64, "learning_rate": 1.6363636363636366e-05, "loss": 1.1708, "step": 18 }, { "epoch": 1.73, "learning_rate": 1.7272727272727274e-05, "loss": 1.1569, "step": 19 }, { "epoch": 1.82, "learning_rate": 1.8181818181818182e-05, "loss": 1.0257, "step": 20 }, { "epoch": 1.91, "learning_rate": 1.9090909090909094e-05, "loss": 1.0738, "step": 21 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.9979, "step": 22 }, { "epoch": 2.0, "eval_accuracy": 0.6631944444444444, "eval_loss": 0.9816781282424927, "eval_runtime": 1.5585, "eval_samples_per_second": 184.794, "eval_steps_per_second": 3.208, "step": 22 }, { "epoch": 2.09, "learning_rate": 2.090909090909091e-05, "loss": 0.9297, "step": 23 }, { "epoch": 2.18, "learning_rate": 2.1818181818181818e-05, "loss": 1.0059, "step": 24 }, { "epoch": 2.27, "learning_rate": 2.272727272727273e-05, "loss": 0.9702, "step": 25 }, { "epoch": 2.36, "learning_rate": 2.3636363636363637e-05, "loss": 0.9763, "step": 26 }, { "epoch": 2.45, "learning_rate": 2.4545454545454545e-05, "loss": 0.9261, "step": 27 }, { "epoch": 2.55, "learning_rate": 2.5454545454545454e-05, "loss": 0.8262, "step": 28 }, { "epoch": 2.64, "learning_rate": 2.636363636363636e-05, "loss": 0.9163, "step": 29 }, { "epoch": 2.73, "learning_rate": 2.7272727272727273e-05, "loss": 0.8509, "step": 30 }, { "epoch": 2.82, "learning_rate": 2.818181818181818e-05, "loss": 0.9219, "step": 31 }, { "epoch": 2.91, "learning_rate": 2.909090909090909e-05, "loss": 0.7396, "step": 32 }, { "epoch": 3.0, "learning_rate": 3e-05, "loss": 0.6746, "step": 33 }, { "epoch": 3.0, "eval_accuracy": 0.7395833333333334, "eval_loss": 0.7423216700553894, "eval_runtime": 1.6283, "eval_samples_per_second": 176.87, "eval_steps_per_second": 3.071, "step": 33 }, { "epoch": 3.09, "learning_rate": 3.090909090909091e-05, "loss": 0.8067, "step": 34 }, { "epoch": 3.18, "learning_rate": 3.181818181818182e-05, "loss": 0.7232, "step": 35 }, { "epoch": 3.27, "learning_rate": 3.272727272727273e-05, "loss": 0.7808, "step": 36 }, { "epoch": 3.36, "learning_rate": 3.3636363636363636e-05, "loss": 0.6977, "step": 37 }, { "epoch": 3.45, "learning_rate": 3.454545454545455e-05, "loss": 0.777, "step": 38 }, { "epoch": 3.55, "learning_rate": 3.545454545454546e-05, "loss": 0.6499, "step": 39 }, { "epoch": 3.64, "learning_rate": 3.6363636363636364e-05, "loss": 0.773, "step": 40 }, { "epoch": 3.73, "learning_rate": 3.7272727272727276e-05, "loss": 0.6632, "step": 41 }, { "epoch": 3.82, "learning_rate": 3.818181818181819e-05, "loss": 0.6182, "step": 42 }, { "epoch": 3.91, "learning_rate": 3.909090909090909e-05, "loss": 0.6828, "step": 43 }, { "epoch": 4.0, "learning_rate": 4e-05, "loss": 0.6364, "step": 44 }, { "epoch": 4.0, "eval_accuracy": 0.7569444444444444, "eval_loss": 0.6075392365455627, "eval_runtime": 1.563, "eval_samples_per_second": 184.265, "eval_steps_per_second": 3.199, "step": 44 }, { "epoch": 4.09, "learning_rate": 4.0909090909090915e-05, "loss": 0.5537, "step": 45 }, { "epoch": 4.18, "learning_rate": 4.181818181818182e-05, "loss": 0.5685, "step": 46 }, { "epoch": 4.27, "learning_rate": 4.2727272727272724e-05, "loss": 0.6091, "step": 47 }, { "epoch": 4.36, "learning_rate": 4.3636363636363636e-05, "loss": 0.617, "step": 48 }, { "epoch": 4.45, "learning_rate": 4.454545454545455e-05, "loss": 0.6308, "step": 49 }, { "epoch": 4.55, "learning_rate": 4.545454545454546e-05, "loss": 0.6292, "step": 50 }, { "epoch": 4.64, "learning_rate": 4.636363636363636e-05, "loss": 0.5864, "step": 51 }, { "epoch": 4.73, "learning_rate": 4.7272727272727275e-05, "loss": 0.6432, "step": 52 }, { "epoch": 4.82, "learning_rate": 4.8181818181818186e-05, "loss": 0.5012, "step": 53 }, { "epoch": 4.91, "learning_rate": 4.909090909090909e-05, "loss": 0.5659, "step": 54 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 0.5425, "step": 55 }, { "epoch": 5.0, "eval_accuracy": 0.7951388888888888, "eval_loss": 0.5500215291976929, "eval_runtime": 1.5702, "eval_samples_per_second": 183.416, "eval_steps_per_second": 3.184, "step": 55 }, { "epoch": 5.09, "learning_rate": 4.98989898989899e-05, "loss": 0.5388, "step": 56 }, { "epoch": 5.18, "learning_rate": 4.97979797979798e-05, "loss": 0.474, "step": 57 }, { "epoch": 5.27, "learning_rate": 4.9696969696969694e-05, "loss": 0.5091, "step": 58 }, { "epoch": 5.36, "learning_rate": 4.9595959595959594e-05, "loss": 0.4634, "step": 59 }, { "epoch": 5.45, "learning_rate": 4.94949494949495e-05, "loss": 0.5044, "step": 60 }, { "epoch": 5.55, "learning_rate": 4.93939393939394e-05, "loss": 0.4715, "step": 61 }, { "epoch": 5.64, "learning_rate": 4.92929292929293e-05, "loss": 0.4705, "step": 62 }, { "epoch": 5.73, "learning_rate": 4.919191919191919e-05, "loss": 0.4706, "step": 63 }, { "epoch": 5.82, "learning_rate": 4.909090909090909e-05, "loss": 0.4945, "step": 64 }, { "epoch": 5.91, "learning_rate": 4.898989898989899e-05, "loss": 0.5611, "step": 65 }, { "epoch": 6.0, "learning_rate": 4.888888888888889e-05, "loss": 0.5001, "step": 66 }, { "epoch": 6.0, "eval_accuracy": 0.8159722222222222, "eval_loss": 0.4882892072200775, "eval_runtime": 1.6356, "eval_samples_per_second": 176.082, "eval_steps_per_second": 3.057, "step": 66 }, { "epoch": 6.09, "learning_rate": 4.878787878787879e-05, "loss": 0.3538, "step": 67 }, { "epoch": 6.18, "learning_rate": 4.868686868686869e-05, "loss": 0.4787, "step": 68 }, { "epoch": 6.27, "learning_rate": 4.858585858585859e-05, "loss": 0.4181, "step": 69 }, { "epoch": 6.36, "learning_rate": 4.848484848484849e-05, "loss": 0.451, "step": 70 }, { "epoch": 6.45, "learning_rate": 4.838383838383839e-05, "loss": 0.4455, "step": 71 }, { "epoch": 6.55, "learning_rate": 4.828282828282829e-05, "loss": 0.4275, "step": 72 }, { "epoch": 6.64, "learning_rate": 4.8181818181818186e-05, "loss": 0.4366, "step": 73 }, { "epoch": 6.73, "learning_rate": 4.808080808080808e-05, "loss": 0.4176, "step": 74 }, { "epoch": 6.82, "learning_rate": 4.797979797979798e-05, "loss": 0.4344, "step": 75 }, { "epoch": 6.91, "learning_rate": 4.787878787878788e-05, "loss": 0.4482, "step": 76 }, { "epoch": 7.0, "learning_rate": 4.7777777777777784e-05, "loss": 0.3519, "step": 77 }, { "epoch": 7.0, "eval_accuracy": 0.8263888888888888, "eval_loss": 0.4539467394351959, "eval_runtime": 1.5682, "eval_samples_per_second": 183.653, "eval_steps_per_second": 3.188, "step": 77 }, { "epoch": 7.09, "learning_rate": 4.7676767676767684e-05, "loss": 0.42, "step": 78 }, { "epoch": 7.18, "learning_rate": 4.7575757575757576e-05, "loss": 0.4017, "step": 79 }, { "epoch": 7.27, "learning_rate": 4.7474747474747476e-05, "loss": 0.3604, "step": 80 }, { "epoch": 7.36, "learning_rate": 4.7373737373737375e-05, "loss": 0.4075, "step": 81 }, { "epoch": 7.45, "learning_rate": 4.7272727272727275e-05, "loss": 0.399, "step": 82 }, { "epoch": 7.55, "learning_rate": 4.7171717171717174e-05, "loss": 0.335, "step": 83 }, { "epoch": 7.64, "learning_rate": 4.7070707070707074e-05, "loss": 0.297, "step": 84 }, { "epoch": 7.73, "learning_rate": 4.696969696969697e-05, "loss": 0.3727, "step": 85 }, { "epoch": 7.82, "learning_rate": 4.686868686868687e-05, "loss": 0.3886, "step": 86 }, { "epoch": 7.91, "learning_rate": 4.676767676767677e-05, "loss": 0.4117, "step": 87 }, { "epoch": 8.0, "learning_rate": 4.666666666666667e-05, "loss": 0.4421, "step": 88 }, { "epoch": 8.0, "eval_accuracy": 0.8194444444444444, "eval_loss": 0.4483272135257721, "eval_runtime": 1.5685, "eval_samples_per_second": 183.62, "eval_steps_per_second": 3.188, "step": 88 }, { "epoch": 8.09, "learning_rate": 4.656565656565657e-05, "loss": 0.334, "step": 89 }, { "epoch": 8.18, "learning_rate": 4.6464646464646464e-05, "loss": 0.3811, "step": 90 }, { "epoch": 8.27, "learning_rate": 4.636363636363636e-05, "loss": 0.3582, "step": 91 }, { "epoch": 8.36, "learning_rate": 4.626262626262626e-05, "loss": 0.381, "step": 92 }, { "epoch": 8.45, "learning_rate": 4.616161616161616e-05, "loss": 0.3449, "step": 93 }, { "epoch": 8.55, "learning_rate": 4.606060606060607e-05, "loss": 0.3465, "step": 94 }, { "epoch": 8.64, "learning_rate": 4.595959595959596e-05, "loss": 0.3676, "step": 95 }, { "epoch": 8.73, "learning_rate": 4.585858585858586e-05, "loss": 0.3616, "step": 96 }, { "epoch": 8.82, "learning_rate": 4.575757575757576e-05, "loss": 0.3392, "step": 97 }, { "epoch": 8.91, "learning_rate": 4.565656565656566e-05, "loss": 0.3374, "step": 98 }, { "epoch": 9.0, "learning_rate": 4.555555555555556e-05, "loss": 0.3207, "step": 99 }, { "epoch": 9.0, "eval_accuracy": 0.84375, "eval_loss": 0.3785366117954254, "eval_runtime": 1.5932, "eval_samples_per_second": 180.771, "eval_steps_per_second": 3.138, "step": 99 }, { "epoch": 9.09, "learning_rate": 4.545454545454546e-05, "loss": 0.3319, "step": 100 }, { "epoch": 9.18, "learning_rate": 4.535353535353535e-05, "loss": 0.3582, "step": 101 }, { "epoch": 9.27, "learning_rate": 4.525252525252526e-05, "loss": 0.2702, "step": 102 }, { "epoch": 9.36, "learning_rate": 4.515151515151516e-05, "loss": 0.3054, "step": 103 }, { "epoch": 9.45, "learning_rate": 4.5050505050505056e-05, "loss": 0.3041, "step": 104 }, { "epoch": 9.55, "learning_rate": 4.494949494949495e-05, "loss": 0.2398, "step": 105 }, { "epoch": 9.64, "learning_rate": 4.484848484848485e-05, "loss": 0.338, "step": 106 }, { "epoch": 9.73, "learning_rate": 4.474747474747475e-05, "loss": 0.2486, "step": 107 }, { "epoch": 9.82, "learning_rate": 4.464646464646465e-05, "loss": 0.279, "step": 108 }, { "epoch": 9.91, "learning_rate": 4.454545454545455e-05, "loss": 0.3784, "step": 109 }, { "epoch": 10.0, "learning_rate": 4.4444444444444447e-05, "loss": 0.3682, "step": 110 }, { "epoch": 10.0, "eval_accuracy": 0.8645833333333334, "eval_loss": 0.3384600281715393, "eval_runtime": 1.5672, "eval_samples_per_second": 183.763, "eval_steps_per_second": 3.19, "step": 110 }, { "epoch": 10.09, "learning_rate": 4.4343434343434346e-05, "loss": 0.3374, "step": 111 }, { "epoch": 10.18, "learning_rate": 4.4242424242424246e-05, "loss": 0.3114, "step": 112 }, { "epoch": 10.27, "learning_rate": 4.4141414141414145e-05, "loss": 0.3062, "step": 113 }, { "epoch": 10.36, "learning_rate": 4.4040404040404044e-05, "loss": 0.3199, "step": 114 }, { "epoch": 10.45, "learning_rate": 4.3939393939393944e-05, "loss": 0.2839, "step": 115 }, { "epoch": 10.55, "learning_rate": 4.383838383838384e-05, "loss": 0.2866, "step": 116 }, { "epoch": 10.64, "learning_rate": 4.3737373737373736e-05, "loss": 0.2628, "step": 117 }, { "epoch": 10.73, "learning_rate": 4.3636363636363636e-05, "loss": 0.3463, "step": 118 }, { "epoch": 10.82, "learning_rate": 4.3535353535353535e-05, "loss": 0.263, "step": 119 }, { "epoch": 10.91, "learning_rate": 4.343434343434344e-05, "loss": 0.3273, "step": 120 }, { "epoch": 11.0, "learning_rate": 4.3333333333333334e-05, "loss": 0.2642, "step": 121 }, { "epoch": 11.0, "eval_accuracy": 0.8402777777777778, "eval_loss": 0.38269615173339844, "eval_runtime": 1.622, "eval_samples_per_second": 177.56, "eval_steps_per_second": 3.083, "step": 121 }, { "epoch": 11.09, "learning_rate": 4.3232323232323234e-05, "loss": 0.2527, "step": 122 }, { "epoch": 11.18, "learning_rate": 4.313131313131313e-05, "loss": 0.2828, "step": 123 }, { "epoch": 11.27, "learning_rate": 4.303030303030303e-05, "loss": 0.2746, "step": 124 }, { "epoch": 11.36, "learning_rate": 4.292929292929293e-05, "loss": 0.2746, "step": 125 }, { "epoch": 11.45, "learning_rate": 4.282828282828283e-05, "loss": 0.2531, "step": 126 }, { "epoch": 11.55, "learning_rate": 4.2727272727272724e-05, "loss": 0.3099, "step": 127 }, { "epoch": 11.64, "learning_rate": 4.262626262626263e-05, "loss": 0.2967, "step": 128 }, { "epoch": 11.73, "learning_rate": 4.252525252525253e-05, "loss": 0.2549, "step": 129 }, { "epoch": 11.82, "learning_rate": 4.242424242424243e-05, "loss": 0.278, "step": 130 }, { "epoch": 11.91, "learning_rate": 4.232323232323233e-05, "loss": 0.2509, "step": 131 }, { "epoch": 12.0, "learning_rate": 4.222222222222222e-05, "loss": 0.3444, "step": 132 }, { "epoch": 12.0, "eval_accuracy": 0.8506944444444444, "eval_loss": 0.346247136592865, "eval_runtime": 1.5635, "eval_samples_per_second": 184.201, "eval_steps_per_second": 3.198, "step": 132 }, { "epoch": 12.09, "learning_rate": 4.212121212121212e-05, "loss": 0.2451, "step": 133 }, { "epoch": 12.18, "learning_rate": 4.202020202020202e-05, "loss": 0.2623, "step": 134 }, { "epoch": 12.27, "learning_rate": 4.191919191919192e-05, "loss": 0.226, "step": 135 }, { "epoch": 12.36, "learning_rate": 4.181818181818182e-05, "loss": 0.2158, "step": 136 }, { "epoch": 12.45, "learning_rate": 4.171717171717172e-05, "loss": 0.2471, "step": 137 }, { "epoch": 12.55, "learning_rate": 4.161616161616162e-05, "loss": 0.249, "step": 138 }, { "epoch": 12.64, "learning_rate": 4.151515151515152e-05, "loss": 0.2439, "step": 139 }, { "epoch": 12.73, "learning_rate": 4.141414141414142e-05, "loss": 0.284, "step": 140 }, { "epoch": 12.82, "learning_rate": 4.131313131313132e-05, "loss": 0.2701, "step": 141 }, { "epoch": 12.91, "learning_rate": 4.1212121212121216e-05, "loss": 0.3189, "step": 142 }, { "epoch": 13.0, "learning_rate": 4.111111111111111e-05, "loss": 0.2423, "step": 143 }, { "epoch": 13.0, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.317039430141449, "eval_runtime": 1.5869, "eval_samples_per_second": 181.49, "eval_steps_per_second": 3.151, "step": 143 }, { "epoch": 13.09, "learning_rate": 4.101010101010101e-05, "loss": 0.2665, "step": 144 }, { "epoch": 13.18, "learning_rate": 4.0909090909090915e-05, "loss": 0.2167, "step": 145 }, { "epoch": 13.27, "learning_rate": 4.0808080808080814e-05, "loss": 0.2306, "step": 146 }, { "epoch": 13.36, "learning_rate": 4.070707070707071e-05, "loss": 0.2633, "step": 147 }, { "epoch": 13.45, "learning_rate": 4.0606060606060606e-05, "loss": 0.2385, "step": 148 }, { "epoch": 13.55, "learning_rate": 4.0505050505050506e-05, "loss": 0.3078, "step": 149 }, { "epoch": 13.64, "learning_rate": 4.0404040404040405e-05, "loss": 0.2579, "step": 150 }, { "epoch": 13.73, "learning_rate": 4.0303030303030305e-05, "loss": 0.2388, "step": 151 }, { "epoch": 13.82, "learning_rate": 4.0202020202020204e-05, "loss": 0.2028, "step": 152 }, { "epoch": 13.91, "learning_rate": 4.01010101010101e-05, "loss": 0.2058, "step": 153 }, { "epoch": 14.0, "learning_rate": 4e-05, "loss": 0.3168, "step": 154 }, { "epoch": 14.0, "eval_accuracy": 0.8715277777777778, "eval_loss": 0.3167605996131897, "eval_runtime": 1.5799, "eval_samples_per_second": 182.284, "eval_steps_per_second": 3.165, "step": 154 }, { "epoch": 14.09, "learning_rate": 3.98989898989899e-05, "loss": 0.1858, "step": 155 }, { "epoch": 14.18, "learning_rate": 3.97979797979798e-05, "loss": 0.251, "step": 156 }, { "epoch": 14.27, "learning_rate": 3.96969696969697e-05, "loss": 0.1862, "step": 157 }, { "epoch": 14.36, "learning_rate": 3.9595959595959594e-05, "loss": 0.2788, "step": 158 }, { "epoch": 14.45, "learning_rate": 3.9494949494949494e-05, "loss": 0.1804, "step": 159 }, { "epoch": 14.55, "learning_rate": 3.939393939393939e-05, "loss": 0.2121, "step": 160 }, { "epoch": 14.64, "learning_rate": 3.929292929292929e-05, "loss": 0.2029, "step": 161 }, { "epoch": 14.73, "learning_rate": 3.91919191919192e-05, "loss": 0.2308, "step": 162 }, { "epoch": 14.82, "learning_rate": 3.909090909090909e-05, "loss": 0.2396, "step": 163 }, { "epoch": 14.91, "learning_rate": 3.898989898989899e-05, "loss": 0.2234, "step": 164 }, { "epoch": 15.0, "learning_rate": 3.888888888888889e-05, "loss": 0.2781, "step": 165 }, { "epoch": 15.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.33227282762527466, "eval_runtime": 1.5652, "eval_samples_per_second": 184.003, "eval_steps_per_second": 3.194, "step": 165 }, { "epoch": 15.09, "learning_rate": 3.878787878787879e-05, "loss": 0.2058, "step": 166 }, { "epoch": 15.18, "learning_rate": 3.868686868686869e-05, "loss": 0.2155, "step": 167 }, { "epoch": 15.27, "learning_rate": 3.858585858585859e-05, "loss": 0.2097, "step": 168 }, { "epoch": 15.36, "learning_rate": 3.848484848484848e-05, "loss": 0.2237, "step": 169 }, { "epoch": 15.45, "learning_rate": 3.838383838383838e-05, "loss": 0.2715, "step": 170 }, { "epoch": 15.55, "learning_rate": 3.828282828282829e-05, "loss": 0.2293, "step": 171 }, { "epoch": 15.64, "learning_rate": 3.818181818181819e-05, "loss": 0.2285, "step": 172 }, { "epoch": 15.73, "learning_rate": 3.8080808080808087e-05, "loss": 0.2612, "step": 173 }, { "epoch": 15.82, "learning_rate": 3.797979797979798e-05, "loss": 0.196, "step": 174 }, { "epoch": 15.91, "learning_rate": 3.787878787878788e-05, "loss": 0.2298, "step": 175 }, { "epoch": 16.0, "learning_rate": 3.777777777777778e-05, "loss": 0.2411, "step": 176 }, { "epoch": 16.0, "eval_accuracy": 0.8715277777777778, "eval_loss": 0.31996577978134155, "eval_runtime": 1.5652, "eval_samples_per_second": 184.008, "eval_steps_per_second": 3.195, "step": 176 }, { "epoch": 16.09, "learning_rate": 3.767676767676768e-05, "loss": 0.1726, "step": 177 }, { "epoch": 16.18, "learning_rate": 3.757575757575758e-05, "loss": 0.2112, "step": 178 }, { "epoch": 16.27, "learning_rate": 3.747474747474748e-05, "loss": 0.1964, "step": 179 }, { "epoch": 16.36, "learning_rate": 3.7373737373737376e-05, "loss": 0.1967, "step": 180 }, { "epoch": 16.45, "learning_rate": 3.7272727272727276e-05, "loss": 0.2126, "step": 181 }, { "epoch": 16.55, "learning_rate": 3.7171717171717175e-05, "loss": 0.1554, "step": 182 }, { "epoch": 16.64, "learning_rate": 3.7070707070707075e-05, "loss": 0.1857, "step": 183 }, { "epoch": 16.73, "learning_rate": 3.6969696969696974e-05, "loss": 0.211, "step": 184 }, { "epoch": 16.82, "learning_rate": 3.686868686868687e-05, "loss": 0.2358, "step": 185 }, { "epoch": 16.91, "learning_rate": 3.6767676767676766e-05, "loss": 0.1626, "step": 186 }, { "epoch": 17.0, "learning_rate": 3.6666666666666666e-05, "loss": 0.2276, "step": 187 }, { "epoch": 17.0, "eval_accuracy": 0.875, "eval_loss": 0.32961538434028625, "eval_runtime": 1.6093, "eval_samples_per_second": 178.958, "eval_steps_per_second": 3.107, "step": 187 }, { "epoch": 17.09, "learning_rate": 3.656565656565657e-05, "loss": 0.1915, "step": 188 }, { "epoch": 17.18, "learning_rate": 3.6464646464646465e-05, "loss": 0.1826, "step": 189 }, { "epoch": 17.27, "learning_rate": 3.6363636363636364e-05, "loss": 0.2162, "step": 190 }, { "epoch": 17.36, "learning_rate": 3.6262626262626264e-05, "loss": 0.1755, "step": 191 }, { "epoch": 17.45, "learning_rate": 3.616161616161616e-05, "loss": 0.24, "step": 192 }, { "epoch": 17.55, "learning_rate": 3.606060606060606e-05, "loss": 0.1746, "step": 193 }, { "epoch": 17.64, "learning_rate": 3.595959595959596e-05, "loss": 0.1443, "step": 194 }, { "epoch": 17.73, "learning_rate": 3.5858585858585855e-05, "loss": 0.189, "step": 195 }, { "epoch": 17.82, "learning_rate": 3.575757575757576e-05, "loss": 0.1735, "step": 196 }, { "epoch": 17.91, "learning_rate": 3.565656565656566e-05, "loss": 0.1383, "step": 197 }, { "epoch": 18.0, "learning_rate": 3.555555555555556e-05, "loss": 0.192, "step": 198 }, { "epoch": 18.0, "eval_accuracy": 0.8854166666666666, "eval_loss": 0.3118571639060974, "eval_runtime": 1.5666, "eval_samples_per_second": 183.832, "eval_steps_per_second": 3.192, "step": 198 }, { "epoch": 18.09, "learning_rate": 3.545454545454546e-05, "loss": 0.2313, "step": 199 }, { "epoch": 18.18, "learning_rate": 3.535353535353535e-05, "loss": 0.2429, "step": 200 }, { "epoch": 18.27, "learning_rate": 3.525252525252525e-05, "loss": 0.1745, "step": 201 }, { "epoch": 18.36, "learning_rate": 3.515151515151515e-05, "loss": 0.189, "step": 202 }, { "epoch": 18.45, "learning_rate": 3.505050505050505e-05, "loss": 0.1607, "step": 203 }, { "epoch": 18.55, "learning_rate": 3.494949494949495e-05, "loss": 0.2268, "step": 204 }, { "epoch": 18.64, "learning_rate": 3.484848484848485e-05, "loss": 0.1263, "step": 205 }, { "epoch": 18.73, "learning_rate": 3.474747474747475e-05, "loss": 0.2372, "step": 206 }, { "epoch": 18.82, "learning_rate": 3.464646464646465e-05, "loss": 0.1604, "step": 207 }, { "epoch": 18.91, "learning_rate": 3.454545454545455e-05, "loss": 0.2622, "step": 208 }, { "epoch": 19.0, "learning_rate": 3.444444444444445e-05, "loss": 0.1612, "step": 209 }, { "epoch": 19.0, "eval_accuracy": 0.875, "eval_loss": 0.36473166942596436, "eval_runtime": 1.5912, "eval_samples_per_second": 180.99, "eval_steps_per_second": 3.142, "step": 209 }, { "epoch": 19.09, "learning_rate": 3.434343434343435e-05, "loss": 0.2007, "step": 210 }, { "epoch": 19.18, "learning_rate": 3.424242424242424e-05, "loss": 0.1945, "step": 211 }, { "epoch": 19.27, "learning_rate": 3.414141414141414e-05, "loss": 0.1654, "step": 212 }, { "epoch": 19.36, "learning_rate": 3.4040404040404045e-05, "loss": 0.141, "step": 213 }, { "epoch": 19.45, "learning_rate": 3.3939393939393945e-05, "loss": 0.2012, "step": 214 }, { "epoch": 19.55, "learning_rate": 3.3838383838383844e-05, "loss": 0.1692, "step": 215 }, { "epoch": 19.64, "learning_rate": 3.373737373737374e-05, "loss": 0.163, "step": 216 }, { "epoch": 19.73, "learning_rate": 3.3636363636363636e-05, "loss": 0.2155, "step": 217 }, { "epoch": 19.82, "learning_rate": 3.3535353535353536e-05, "loss": 0.1732, "step": 218 }, { "epoch": 19.91, "learning_rate": 3.3434343434343435e-05, "loss": 0.1395, "step": 219 }, { "epoch": 20.0, "learning_rate": 3.3333333333333335e-05, "loss": 0.1084, "step": 220 }, { "epoch": 20.0, "eval_accuracy": 0.8993055555555556, "eval_loss": 0.2640966773033142, "eval_runtime": 1.5715, "eval_samples_per_second": 183.268, "eval_steps_per_second": 3.182, "step": 220 }, { "epoch": 20.09, "learning_rate": 3.3232323232323234e-05, "loss": 0.138, "step": 221 }, { "epoch": 20.18, "learning_rate": 3.3131313131313134e-05, "loss": 0.1625, "step": 222 }, { "epoch": 20.27, "learning_rate": 3.303030303030303e-05, "loss": 0.1838, "step": 223 }, { "epoch": 20.36, "learning_rate": 3.292929292929293e-05, "loss": 0.1558, "step": 224 }, { "epoch": 20.45, "learning_rate": 3.282828282828283e-05, "loss": 0.2134, "step": 225 }, { "epoch": 20.55, "learning_rate": 3.272727272727273e-05, "loss": 0.1694, "step": 226 }, { "epoch": 20.64, "learning_rate": 3.2626262626262624e-05, "loss": 0.2243, "step": 227 }, { "epoch": 20.73, "learning_rate": 3.2525252525252524e-05, "loss": 0.219, "step": 228 }, { "epoch": 20.82, "learning_rate": 3.2424242424242423e-05, "loss": 0.1761, "step": 229 }, { "epoch": 20.91, "learning_rate": 3.232323232323233e-05, "loss": 0.1231, "step": 230 }, { "epoch": 21.0, "learning_rate": 3.222222222222223e-05, "loss": 0.2099, "step": 231 }, { "epoch": 21.0, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.2806669771671295, "eval_runtime": 1.5627, "eval_samples_per_second": 184.299, "eval_steps_per_second": 3.2, "step": 231 }, { "epoch": 21.09, "learning_rate": 3.212121212121212e-05, "loss": 0.1831, "step": 232 }, { "epoch": 21.18, "learning_rate": 3.202020202020202e-05, "loss": 0.123, "step": 233 }, { "epoch": 21.27, "learning_rate": 3.191919191919192e-05, "loss": 0.168, "step": 234 }, { "epoch": 21.36, "learning_rate": 3.181818181818182e-05, "loss": 0.1516, "step": 235 }, { "epoch": 21.45, "learning_rate": 3.171717171717172e-05, "loss": 0.1424, "step": 236 }, { "epoch": 21.55, "learning_rate": 3.161616161616161e-05, "loss": 0.1368, "step": 237 }, { "epoch": 21.64, "learning_rate": 3.151515151515151e-05, "loss": 0.234, "step": 238 }, { "epoch": 21.73, "learning_rate": 3.141414141414142e-05, "loss": 0.1563, "step": 239 }, { "epoch": 21.82, "learning_rate": 3.131313131313132e-05, "loss": 0.1288, "step": 240 }, { "epoch": 21.91, "learning_rate": 3.121212121212122e-05, "loss": 0.1547, "step": 241 }, { "epoch": 22.0, "learning_rate": 3.111111111111111e-05, "loss": 0.1666, "step": 242 }, { "epoch": 22.0, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.25945279002189636, "eval_runtime": 1.5674, "eval_samples_per_second": 183.74, "eval_steps_per_second": 3.19, "step": 242 }, { "epoch": 22.09, "learning_rate": 3.101010101010101e-05, "loss": 0.1237, "step": 243 }, { "epoch": 22.18, "learning_rate": 3.090909090909091e-05, "loss": 0.1206, "step": 244 }, { "epoch": 22.27, "learning_rate": 3.080808080808081e-05, "loss": 0.1572, "step": 245 }, { "epoch": 22.36, "learning_rate": 3.070707070707071e-05, "loss": 0.2006, "step": 246 }, { "epoch": 22.45, "learning_rate": 3.060606060606061e-05, "loss": 0.191, "step": 247 }, { "epoch": 22.55, "learning_rate": 3.050505050505051e-05, "loss": 0.1191, "step": 248 }, { "epoch": 22.64, "learning_rate": 3.0404040404040406e-05, "loss": 0.1462, "step": 249 }, { "epoch": 22.73, "learning_rate": 3.0303030303030306e-05, "loss": 0.1679, "step": 250 }, { "epoch": 22.82, "learning_rate": 3.0202020202020205e-05, "loss": 0.2127, "step": 251 }, { "epoch": 22.91, "learning_rate": 3.01010101010101e-05, "loss": 0.1359, "step": 252 }, { "epoch": 23.0, "learning_rate": 3e-05, "loss": 0.1355, "step": 253 }, { "epoch": 23.0, "eval_accuracy": 0.8923611111111112, "eval_loss": 0.2734811007976532, "eval_runtime": 1.5633, "eval_samples_per_second": 184.23, "eval_steps_per_second": 3.198, "step": 253 }, { "epoch": 23.09, "learning_rate": 2.98989898989899e-05, "loss": 0.1139, "step": 254 }, { "epoch": 23.18, "learning_rate": 2.9797979797979796e-05, "loss": 0.1299, "step": 255 }, { "epoch": 23.27, "learning_rate": 2.96969696969697e-05, "loss": 0.1732, "step": 256 }, { "epoch": 23.36, "learning_rate": 2.95959595959596e-05, "loss": 0.1417, "step": 257 }, { "epoch": 23.45, "learning_rate": 2.9494949494949498e-05, "loss": 0.1588, "step": 258 }, { "epoch": 23.55, "learning_rate": 2.9393939393939394e-05, "loss": 0.1159, "step": 259 }, { "epoch": 23.64, "learning_rate": 2.9292929292929294e-05, "loss": 0.1636, "step": 260 }, { "epoch": 23.73, "learning_rate": 2.9191919191919193e-05, "loss": 0.1375, "step": 261 }, { "epoch": 23.82, "learning_rate": 2.909090909090909e-05, "loss": 0.128, "step": 262 }, { "epoch": 23.91, "learning_rate": 2.898989898989899e-05, "loss": 0.1347, "step": 263 }, { "epoch": 24.0, "learning_rate": 2.8888888888888888e-05, "loss": 0.1165, "step": 264 }, { "epoch": 24.0, "eval_accuracy": 0.8784722222222222, "eval_loss": 0.3238440155982971, "eval_runtime": 1.6248, "eval_samples_per_second": 177.253, "eval_steps_per_second": 3.077, "step": 264 }, { "epoch": 24.09, "learning_rate": 2.878787878787879e-05, "loss": 0.108, "step": 265 }, { "epoch": 24.18, "learning_rate": 2.868686868686869e-05, "loss": 0.1276, "step": 266 }, { "epoch": 24.27, "learning_rate": 2.8585858585858587e-05, "loss": 0.1893, "step": 267 }, { "epoch": 24.36, "learning_rate": 2.8484848484848486e-05, "loss": 0.1291, "step": 268 }, { "epoch": 24.45, "learning_rate": 2.8383838383838386e-05, "loss": 0.1547, "step": 269 }, { "epoch": 24.55, "learning_rate": 2.8282828282828282e-05, "loss": 0.1697, "step": 270 }, { "epoch": 24.64, "learning_rate": 2.818181818181818e-05, "loss": 0.1465, "step": 271 }, { "epoch": 24.73, "learning_rate": 2.808080808080808e-05, "loss": 0.1422, "step": 272 }, { "epoch": 24.82, "learning_rate": 2.7979797979797984e-05, "loss": 0.165, "step": 273 }, { "epoch": 24.91, "learning_rate": 2.7878787878787883e-05, "loss": 0.1262, "step": 274 }, { "epoch": 25.0, "learning_rate": 2.777777777777778e-05, "loss": 0.112, "step": 275 }, { "epoch": 25.0, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.3065586984157562, "eval_runtime": 1.5997, "eval_samples_per_second": 180.037, "eval_steps_per_second": 3.126, "step": 275 }, { "epoch": 25.09, "learning_rate": 2.767676767676768e-05, "loss": 0.1132, "step": 276 }, { "epoch": 25.18, "learning_rate": 2.7575757575757578e-05, "loss": 0.1252, "step": 277 }, { "epoch": 25.27, "learning_rate": 2.7474747474747474e-05, "loss": 0.1023, "step": 278 }, { "epoch": 25.36, "learning_rate": 2.7373737373737374e-05, "loss": 0.1254, "step": 279 }, { "epoch": 25.45, "learning_rate": 2.7272727272727273e-05, "loss": 0.1575, "step": 280 }, { "epoch": 25.55, "learning_rate": 2.717171717171717e-05, "loss": 0.1072, "step": 281 }, { "epoch": 25.64, "learning_rate": 2.7070707070707075e-05, "loss": 0.135, "step": 282 }, { "epoch": 25.73, "learning_rate": 2.696969696969697e-05, "loss": 0.1851, "step": 283 }, { "epoch": 25.82, "learning_rate": 2.686868686868687e-05, "loss": 0.1454, "step": 284 }, { "epoch": 25.91, "learning_rate": 2.676767676767677e-05, "loss": 0.1629, "step": 285 }, { "epoch": 26.0, "learning_rate": 2.6666666666666667e-05, "loss": 0.1191, "step": 286 }, { "epoch": 26.0, "eval_accuracy": 0.90625, "eval_loss": 0.24274842441082, "eval_runtime": 1.5782, "eval_samples_per_second": 182.491, "eval_steps_per_second": 3.168, "step": 286 }, { "epoch": 26.09, "learning_rate": 2.6565656565656566e-05, "loss": 0.1726, "step": 287 }, { "epoch": 26.18, "learning_rate": 2.6464646464646466e-05, "loss": 0.1017, "step": 288 }, { "epoch": 26.27, "learning_rate": 2.636363636363636e-05, "loss": 0.0857, "step": 289 }, { "epoch": 26.36, "learning_rate": 2.6262626262626268e-05, "loss": 0.1463, "step": 290 }, { "epoch": 26.45, "learning_rate": 2.6161616161616164e-05, "loss": 0.1513, "step": 291 }, { "epoch": 26.55, "learning_rate": 2.6060606060606063e-05, "loss": 0.0975, "step": 292 }, { "epoch": 26.64, "learning_rate": 2.5959595959595963e-05, "loss": 0.1405, "step": 293 }, { "epoch": 26.73, "learning_rate": 2.585858585858586e-05, "loss": 0.1236, "step": 294 }, { "epoch": 26.82, "learning_rate": 2.575757575757576e-05, "loss": 0.1641, "step": 295 }, { "epoch": 26.91, "learning_rate": 2.5656565656565658e-05, "loss": 0.1164, "step": 296 }, { "epoch": 27.0, "learning_rate": 2.5555555555555554e-05, "loss": 0.1293, "step": 297 }, { "epoch": 27.0, "eval_accuracy": 0.9201388888888888, "eval_loss": 0.25357797741889954, "eval_runtime": 1.574, "eval_samples_per_second": 182.979, "eval_steps_per_second": 3.177, "step": 297 }, { "epoch": 27.09, "learning_rate": 2.5454545454545454e-05, "loss": 0.1206, "step": 298 }, { "epoch": 27.18, "learning_rate": 2.5353535353535356e-05, "loss": 0.1533, "step": 299 }, { "epoch": 27.27, "learning_rate": 2.5252525252525256e-05, "loss": 0.1269, "step": 300 }, { "epoch": 27.36, "learning_rate": 2.5151515151515155e-05, "loss": 0.1208, "step": 301 }, { "epoch": 27.45, "learning_rate": 2.505050505050505e-05, "loss": 0.1069, "step": 302 }, { "epoch": 27.55, "learning_rate": 2.494949494949495e-05, "loss": 0.1034, "step": 303 }, { "epoch": 27.64, "learning_rate": 2.4848484848484847e-05, "loss": 0.087, "step": 304 }, { "epoch": 27.73, "learning_rate": 2.474747474747475e-05, "loss": 0.0929, "step": 305 }, { "epoch": 27.82, "learning_rate": 2.464646464646465e-05, "loss": 0.1334, "step": 306 }, { "epoch": 27.91, "learning_rate": 2.4545454545454545e-05, "loss": 0.112, "step": 307 }, { "epoch": 28.0, "learning_rate": 2.4444444444444445e-05, "loss": 0.2932, "step": 308 }, { "epoch": 28.0, "eval_accuracy": 0.8923611111111112, "eval_loss": 0.27067822217941284, "eval_runtime": 1.5637, "eval_samples_per_second": 184.181, "eval_steps_per_second": 3.198, "step": 308 }, { "epoch": 28.09, "learning_rate": 2.4343434343434344e-05, "loss": 0.1466, "step": 309 }, { "epoch": 28.18, "learning_rate": 2.4242424242424244e-05, "loss": 0.1069, "step": 310 }, { "epoch": 28.27, "learning_rate": 2.4141414141414143e-05, "loss": 0.0743, "step": 311 }, { "epoch": 28.36, "learning_rate": 2.404040404040404e-05, "loss": 0.0703, "step": 312 }, { "epoch": 28.45, "learning_rate": 2.393939393939394e-05, "loss": 0.1335, "step": 313 }, { "epoch": 28.55, "learning_rate": 2.3838383838383842e-05, "loss": 0.1313, "step": 314 }, { "epoch": 28.64, "learning_rate": 2.3737373737373738e-05, "loss": 0.1224, "step": 315 }, { "epoch": 28.73, "learning_rate": 2.3636363636363637e-05, "loss": 0.1197, "step": 316 }, { "epoch": 28.82, "learning_rate": 2.3535353535353537e-05, "loss": 0.1188, "step": 317 }, { "epoch": 28.91, "learning_rate": 2.3434343434343436e-05, "loss": 0.1575, "step": 318 }, { "epoch": 29.0, "learning_rate": 2.3333333333333336e-05, "loss": 0.0918, "step": 319 }, { "epoch": 29.0, "eval_accuracy": 0.8923611111111112, "eval_loss": 0.26875555515289307, "eval_runtime": 1.5673, "eval_samples_per_second": 183.752, "eval_steps_per_second": 3.19, "step": 319 }, { "epoch": 29.09, "learning_rate": 2.3232323232323232e-05, "loss": 0.0987, "step": 320 }, { "epoch": 29.18, "learning_rate": 2.313131313131313e-05, "loss": 0.1075, "step": 321 }, { "epoch": 29.27, "learning_rate": 2.3030303030303034e-05, "loss": 0.1035, "step": 322 }, { "epoch": 29.36, "learning_rate": 2.292929292929293e-05, "loss": 0.1098, "step": 323 }, { "epoch": 29.45, "learning_rate": 2.282828282828283e-05, "loss": 0.0967, "step": 324 }, { "epoch": 29.55, "learning_rate": 2.272727272727273e-05, "loss": 0.1303, "step": 325 }, { "epoch": 29.64, "learning_rate": 2.262626262626263e-05, "loss": 0.1276, "step": 326 }, { "epoch": 29.73, "learning_rate": 2.2525252525252528e-05, "loss": 0.1105, "step": 327 }, { "epoch": 29.82, "learning_rate": 2.2424242424242424e-05, "loss": 0.1067, "step": 328 }, { "epoch": 29.91, "learning_rate": 2.2323232323232324e-05, "loss": 0.1119, "step": 329 }, { "epoch": 30.0, "learning_rate": 2.2222222222222223e-05, "loss": 0.1529, "step": 330 }, { "epoch": 30.0, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.2714914083480835, "eval_runtime": 1.5703, "eval_samples_per_second": 183.402, "eval_steps_per_second": 3.184, "step": 330 }, { "epoch": 30.09, "learning_rate": 2.2121212121212123e-05, "loss": 0.1097, "step": 331 }, { "epoch": 30.18, "learning_rate": 2.2020202020202022e-05, "loss": 0.0645, "step": 332 }, { "epoch": 30.27, "learning_rate": 2.191919191919192e-05, "loss": 0.098, "step": 333 }, { "epoch": 30.36, "learning_rate": 2.1818181818181818e-05, "loss": 0.1134, "step": 334 }, { "epoch": 30.45, "learning_rate": 2.171717171717172e-05, "loss": 0.1161, "step": 335 }, { "epoch": 30.55, "learning_rate": 2.1616161616161617e-05, "loss": 0.1452, "step": 336 }, { "epoch": 30.64, "learning_rate": 2.1515151515151516e-05, "loss": 0.0576, "step": 337 }, { "epoch": 30.73, "learning_rate": 2.1414141414141416e-05, "loss": 0.1293, "step": 338 }, { "epoch": 30.82, "learning_rate": 2.1313131313131315e-05, "loss": 0.1267, "step": 339 }, { "epoch": 30.91, "learning_rate": 2.1212121212121215e-05, "loss": 0.1049, "step": 340 }, { "epoch": 31.0, "learning_rate": 2.111111111111111e-05, "loss": 0.227, "step": 341 }, { "epoch": 31.0, "eval_accuracy": 0.9027777777777778, "eval_loss": 0.2664012014865875, "eval_runtime": 1.5768, "eval_samples_per_second": 182.651, "eval_steps_per_second": 3.171, "step": 341 }, { "epoch": 31.09, "learning_rate": 2.101010101010101e-05, "loss": 0.1239, "step": 342 }, { "epoch": 31.18, "learning_rate": 2.090909090909091e-05, "loss": 0.0908, "step": 343 }, { "epoch": 31.27, "learning_rate": 2.080808080808081e-05, "loss": 0.1132, "step": 344 }, { "epoch": 31.36, "learning_rate": 2.070707070707071e-05, "loss": 0.0946, "step": 345 }, { "epoch": 31.45, "learning_rate": 2.0606060606060608e-05, "loss": 0.093, "step": 346 }, { "epoch": 31.55, "learning_rate": 2.0505050505050504e-05, "loss": 0.1207, "step": 347 }, { "epoch": 31.64, "learning_rate": 2.0404040404040407e-05, "loss": 0.111, "step": 348 }, { "epoch": 31.73, "learning_rate": 2.0303030303030303e-05, "loss": 0.1069, "step": 349 }, { "epoch": 31.82, "learning_rate": 2.0202020202020203e-05, "loss": 0.0982, "step": 350 }, { "epoch": 31.91, "learning_rate": 2.0101010101010102e-05, "loss": 0.158, "step": 351 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 0.1044, "step": 352 }, { "epoch": 32.0, "eval_accuracy": 0.8993055555555556, "eval_loss": 0.2808922529220581, "eval_runtime": 1.5749, "eval_samples_per_second": 182.87, "eval_steps_per_second": 3.175, "step": 352 }, { "epoch": 32.09, "learning_rate": 1.98989898989899e-05, "loss": 0.0917, "step": 353 }, { "epoch": 32.18, "learning_rate": 1.9797979797979797e-05, "loss": 0.0979, "step": 354 }, { "epoch": 32.27, "learning_rate": 1.9696969696969697e-05, "loss": 0.111, "step": 355 }, { "epoch": 32.36, "learning_rate": 1.95959595959596e-05, "loss": 0.121, "step": 356 }, { "epoch": 32.45, "learning_rate": 1.9494949494949496e-05, "loss": 0.1075, "step": 357 }, { "epoch": 32.55, "learning_rate": 1.9393939393939395e-05, "loss": 0.1152, "step": 358 }, { "epoch": 32.64, "learning_rate": 1.9292929292929295e-05, "loss": 0.0991, "step": 359 }, { "epoch": 32.73, "learning_rate": 1.919191919191919e-05, "loss": 0.1117, "step": 360 }, { "epoch": 32.82, "learning_rate": 1.9090909090909094e-05, "loss": 0.1275, "step": 361 }, { "epoch": 32.91, "learning_rate": 1.898989898989899e-05, "loss": 0.1033, "step": 362 }, { "epoch": 33.0, "learning_rate": 1.888888888888889e-05, "loss": 0.0894, "step": 363 }, { "epoch": 33.0, "eval_accuracy": 0.8923611111111112, "eval_loss": 0.2863243818283081, "eval_runtime": 1.5629, "eval_samples_per_second": 184.271, "eval_steps_per_second": 3.199, "step": 363 }, { "epoch": 33.09, "learning_rate": 1.878787878787879e-05, "loss": 0.0979, "step": 364 }, { "epoch": 33.18, "learning_rate": 1.8686868686868688e-05, "loss": 0.076, "step": 365 }, { "epoch": 33.27, "learning_rate": 1.8585858585858588e-05, "loss": 0.0744, "step": 366 }, { "epoch": 33.36, "learning_rate": 1.8484848484848487e-05, "loss": 0.0952, "step": 367 }, { "epoch": 33.45, "learning_rate": 1.8383838383838383e-05, "loss": 0.1057, "step": 368 }, { "epoch": 33.55, "learning_rate": 1.8282828282828286e-05, "loss": 0.0713, "step": 369 }, { "epoch": 33.64, "learning_rate": 1.8181818181818182e-05, "loss": 0.0957, "step": 370 }, { "epoch": 33.73, "learning_rate": 1.808080808080808e-05, "loss": 0.0771, "step": 371 }, { "epoch": 33.82, "learning_rate": 1.797979797979798e-05, "loss": 0.111, "step": 372 }, { "epoch": 33.91, "learning_rate": 1.787878787878788e-05, "loss": 0.1235, "step": 373 }, { "epoch": 34.0, "learning_rate": 1.777777777777778e-05, "loss": 0.0566, "step": 374 }, { "epoch": 34.0, "eval_accuracy": 0.9201388888888888, "eval_loss": 0.24742072820663452, "eval_runtime": 1.5761, "eval_samples_per_second": 182.728, "eval_steps_per_second": 3.172, "step": 374 }, { "epoch": 34.09, "learning_rate": 1.7676767676767676e-05, "loss": 0.0865, "step": 375 }, { "epoch": 34.18, "learning_rate": 1.7575757575757576e-05, "loss": 0.1036, "step": 376 }, { "epoch": 34.27, "learning_rate": 1.7474747474747475e-05, "loss": 0.0638, "step": 377 }, { "epoch": 34.36, "learning_rate": 1.7373737373737375e-05, "loss": 0.0975, "step": 378 }, { "epoch": 34.45, "learning_rate": 1.7272727272727274e-05, "loss": 0.0933, "step": 379 }, { "epoch": 34.55, "learning_rate": 1.7171717171717173e-05, "loss": 0.1113, "step": 380 }, { "epoch": 34.64, "learning_rate": 1.707070707070707e-05, "loss": 0.0718, "step": 381 }, { "epoch": 34.73, "learning_rate": 1.6969696969696972e-05, "loss": 0.1104, "step": 382 }, { "epoch": 34.82, "learning_rate": 1.686868686868687e-05, "loss": 0.0873, "step": 383 }, { "epoch": 34.91, "learning_rate": 1.6767676767676768e-05, "loss": 0.1197, "step": 384 }, { "epoch": 35.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.0915, "step": 385 }, { "epoch": 35.0, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.2427683174610138, "eval_runtime": 1.5702, "eval_samples_per_second": 183.413, "eval_steps_per_second": 3.184, "step": 385 }, { "epoch": 35.09, "learning_rate": 1.6565656565656567e-05, "loss": 0.1235, "step": 386 }, { "epoch": 35.18, "learning_rate": 1.6464646464646466e-05, "loss": 0.0871, "step": 387 }, { "epoch": 35.27, "learning_rate": 1.6363636363636366e-05, "loss": 0.0765, "step": 388 }, { "epoch": 35.36, "learning_rate": 1.6262626262626262e-05, "loss": 0.0927, "step": 389 }, { "epoch": 35.45, "learning_rate": 1.6161616161616165e-05, "loss": 0.1012, "step": 390 }, { "epoch": 35.55, "learning_rate": 1.606060606060606e-05, "loss": 0.084, "step": 391 }, { "epoch": 35.64, "learning_rate": 1.595959595959596e-05, "loss": 0.0936, "step": 392 }, { "epoch": 35.73, "learning_rate": 1.585858585858586e-05, "loss": 0.128, "step": 393 }, { "epoch": 35.82, "learning_rate": 1.5757575757575756e-05, "loss": 0.0969, "step": 394 }, { "epoch": 35.91, "learning_rate": 1.565656565656566e-05, "loss": 0.0794, "step": 395 }, { "epoch": 36.0, "learning_rate": 1.5555555555555555e-05, "loss": 0.1136, "step": 396 }, { "epoch": 36.0, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.25445806980133057, "eval_runtime": 1.5637, "eval_samples_per_second": 184.175, "eval_steps_per_second": 3.197, "step": 396 }, { "epoch": 36.09, "learning_rate": 1.5454545454545454e-05, "loss": 0.0435, "step": 397 }, { "epoch": 36.18, "learning_rate": 1.5353535353535354e-05, "loss": 0.0474, "step": 398 }, { "epoch": 36.27, "learning_rate": 1.5252525252525255e-05, "loss": 0.1082, "step": 399 }, { "epoch": 36.36, "learning_rate": 1.5151515151515153e-05, "loss": 0.0613, "step": 400 }, { "epoch": 36.45, "learning_rate": 1.505050505050505e-05, "loss": 0.0681, "step": 401 }, { "epoch": 36.55, "learning_rate": 1.494949494949495e-05, "loss": 0.0777, "step": 402 }, { "epoch": 36.64, "learning_rate": 1.484848484848485e-05, "loss": 0.0706, "step": 403 }, { "epoch": 36.73, "learning_rate": 1.4747474747474749e-05, "loss": 0.0696, "step": 404 }, { "epoch": 36.82, "learning_rate": 1.4646464646464647e-05, "loss": 0.0689, "step": 405 }, { "epoch": 36.91, "learning_rate": 1.4545454545454545e-05, "loss": 0.0978, "step": 406 }, { "epoch": 37.0, "learning_rate": 1.4444444444444444e-05, "loss": 0.0947, "step": 407 }, { "epoch": 37.0, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.2598806619644165, "eval_runtime": 1.5727, "eval_samples_per_second": 183.13, "eval_steps_per_second": 3.179, "step": 407 }, { "epoch": 37.09, "learning_rate": 1.4343434343434345e-05, "loss": 0.05, "step": 408 }, { "epoch": 37.18, "learning_rate": 1.4242424242424243e-05, "loss": 0.1237, "step": 409 }, { "epoch": 37.27, "learning_rate": 1.4141414141414141e-05, "loss": 0.0491, "step": 410 }, { "epoch": 37.36, "learning_rate": 1.404040404040404e-05, "loss": 0.0882, "step": 411 }, { "epoch": 37.45, "learning_rate": 1.3939393939393942e-05, "loss": 0.0765, "step": 412 }, { "epoch": 37.55, "learning_rate": 1.383838383838384e-05, "loss": 0.073, "step": 413 }, { "epoch": 37.64, "learning_rate": 1.3737373737373737e-05, "loss": 0.0728, "step": 414 }, { "epoch": 37.73, "learning_rate": 1.3636363636363637e-05, "loss": 0.075, "step": 415 }, { "epoch": 37.82, "learning_rate": 1.3535353535353538e-05, "loss": 0.0682, "step": 416 }, { "epoch": 37.91, "learning_rate": 1.3434343434343436e-05, "loss": 0.0675, "step": 417 }, { "epoch": 38.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.1012, "step": 418 }, { "epoch": 38.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.24541756510734558, "eval_runtime": 1.5701, "eval_samples_per_second": 183.432, "eval_steps_per_second": 3.185, "step": 418 }, { "epoch": 38.09, "learning_rate": 1.3232323232323233e-05, "loss": 0.0642, "step": 419 }, { "epoch": 38.18, "learning_rate": 1.3131313131313134e-05, "loss": 0.0701, "step": 420 }, { "epoch": 38.27, "learning_rate": 1.3030303030303032e-05, "loss": 0.0675, "step": 421 }, { "epoch": 38.36, "learning_rate": 1.292929292929293e-05, "loss": 0.0763, "step": 422 }, { "epoch": 38.45, "learning_rate": 1.2828282828282829e-05, "loss": 0.0664, "step": 423 }, { "epoch": 38.55, "learning_rate": 1.2727272727272727e-05, "loss": 0.1222, "step": 424 }, { "epoch": 38.64, "learning_rate": 1.2626262626262628e-05, "loss": 0.072, "step": 425 }, { "epoch": 38.73, "learning_rate": 1.2525252525252526e-05, "loss": 0.0877, "step": 426 }, { "epoch": 38.82, "learning_rate": 1.2424242424242424e-05, "loss": 0.1098, "step": 427 }, { "epoch": 38.91, "learning_rate": 1.2323232323232325e-05, "loss": 0.0774, "step": 428 }, { "epoch": 39.0, "learning_rate": 1.2222222222222222e-05, "loss": 0.0465, "step": 429 }, { "epoch": 39.0, "eval_accuracy": 0.9201388888888888, "eval_loss": 0.24348551034927368, "eval_runtime": 1.5838, "eval_samples_per_second": 181.842, "eval_steps_per_second": 3.157, "step": 429 }, { "epoch": 39.09, "learning_rate": 1.2121212121212122e-05, "loss": 0.0474, "step": 430 }, { "epoch": 39.18, "learning_rate": 1.202020202020202e-05, "loss": 0.0535, "step": 431 }, { "epoch": 39.27, "learning_rate": 1.1919191919191921e-05, "loss": 0.1069, "step": 432 }, { "epoch": 39.36, "learning_rate": 1.1818181818181819e-05, "loss": 0.0872, "step": 433 }, { "epoch": 39.45, "learning_rate": 1.1717171717171718e-05, "loss": 0.0852, "step": 434 }, { "epoch": 39.55, "learning_rate": 1.1616161616161616e-05, "loss": 0.0503, "step": 435 }, { "epoch": 39.64, "learning_rate": 1.1515151515151517e-05, "loss": 0.0746, "step": 436 }, { "epoch": 39.73, "learning_rate": 1.1414141414141415e-05, "loss": 0.0866, "step": 437 }, { "epoch": 39.82, "learning_rate": 1.1313131313131314e-05, "loss": 0.1031, "step": 438 }, { "epoch": 39.91, "learning_rate": 1.1212121212121212e-05, "loss": 0.075, "step": 439 }, { "epoch": 40.0, "learning_rate": 1.1111111111111112e-05, "loss": 0.0299, "step": 440 }, { "epoch": 40.0, "eval_accuracy": 0.90625, "eval_loss": 0.25319477915763855, "eval_runtime": 1.5767, "eval_samples_per_second": 182.661, "eval_steps_per_second": 3.171, "step": 440 }, { "epoch": 40.09, "learning_rate": 1.1010101010101011e-05, "loss": 0.0958, "step": 441 }, { "epoch": 40.18, "learning_rate": 1.0909090909090909e-05, "loss": 0.0964, "step": 442 }, { "epoch": 40.27, "learning_rate": 1.0808080808080808e-05, "loss": 0.0524, "step": 443 }, { "epoch": 40.36, "learning_rate": 1.0707070707070708e-05, "loss": 0.0487, "step": 444 }, { "epoch": 40.45, "learning_rate": 1.0606060606060607e-05, "loss": 0.0801, "step": 445 }, { "epoch": 40.55, "learning_rate": 1.0505050505050505e-05, "loss": 0.1102, "step": 446 }, { "epoch": 40.64, "learning_rate": 1.0404040404040405e-05, "loss": 0.0523, "step": 447 }, { "epoch": 40.73, "learning_rate": 1.0303030303030304e-05, "loss": 0.0825, "step": 448 }, { "epoch": 40.82, "learning_rate": 1.0202020202020204e-05, "loss": 0.0664, "step": 449 }, { "epoch": 40.91, "learning_rate": 1.0101010101010101e-05, "loss": 0.0621, "step": 450 }, { "epoch": 41.0, "learning_rate": 1e-05, "loss": 0.0311, "step": 451 }, { "epoch": 41.0, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.2298324704170227, "eval_runtime": 1.577, "eval_samples_per_second": 182.629, "eval_steps_per_second": 3.171, "step": 451 }, { "epoch": 41.09, "learning_rate": 9.898989898989899e-06, "loss": 0.0482, "step": 452 }, { "epoch": 41.18, "learning_rate": 9.7979797979798e-06, "loss": 0.0874, "step": 453 }, { "epoch": 41.27, "learning_rate": 9.696969696969698e-06, "loss": 0.0582, "step": 454 }, { "epoch": 41.36, "learning_rate": 9.595959595959595e-06, "loss": 0.0519, "step": 455 }, { "epoch": 41.45, "learning_rate": 9.494949494949495e-06, "loss": 0.1067, "step": 456 }, { "epoch": 41.55, "learning_rate": 9.393939393939394e-06, "loss": 0.0855, "step": 457 }, { "epoch": 41.64, "learning_rate": 9.292929292929294e-06, "loss": 0.0874, "step": 458 }, { "epoch": 41.73, "learning_rate": 9.191919191919192e-06, "loss": 0.0601, "step": 459 }, { "epoch": 41.82, "learning_rate": 9.090909090909091e-06, "loss": 0.0572, "step": 460 }, { "epoch": 41.91, "learning_rate": 8.98989898989899e-06, "loss": 0.0762, "step": 461 }, { "epoch": 42.0, "learning_rate": 8.88888888888889e-06, "loss": 0.0796, "step": 462 }, { "epoch": 42.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.24221903085708618, "eval_runtime": 1.5621, "eval_samples_per_second": 184.366, "eval_steps_per_second": 3.201, "step": 462 }, { "epoch": 42.09, "learning_rate": 8.787878787878788e-06, "loss": 0.0636, "step": 463 }, { "epoch": 42.18, "learning_rate": 8.686868686868687e-06, "loss": 0.0522, "step": 464 }, { "epoch": 42.27, "learning_rate": 8.585858585858587e-06, "loss": 0.0725, "step": 465 }, { "epoch": 42.36, "learning_rate": 8.484848484848486e-06, "loss": 0.0872, "step": 466 }, { "epoch": 42.45, "learning_rate": 8.383838383838384e-06, "loss": 0.0571, "step": 467 }, { "epoch": 42.55, "learning_rate": 8.282828282828283e-06, "loss": 0.0991, "step": 468 }, { "epoch": 42.64, "learning_rate": 8.181818181818183e-06, "loss": 0.0781, "step": 469 }, { "epoch": 42.73, "learning_rate": 8.080808080808082e-06, "loss": 0.0814, "step": 470 }, { "epoch": 42.82, "learning_rate": 7.97979797979798e-06, "loss": 0.071, "step": 471 }, { "epoch": 42.91, "learning_rate": 7.878787878787878e-06, "loss": 0.08, "step": 472 }, { "epoch": 43.0, "learning_rate": 7.777777777777777e-06, "loss": 0.058, "step": 473 }, { "epoch": 43.0, "eval_accuracy": 0.9305555555555556, "eval_loss": 0.2058059573173523, "eval_runtime": 1.5649, "eval_samples_per_second": 184.034, "eval_steps_per_second": 3.195, "step": 473 }, { "epoch": 43.09, "learning_rate": 7.676767676767677e-06, "loss": 0.0719, "step": 474 }, { "epoch": 43.18, "learning_rate": 7.5757575757575764e-06, "loss": 0.0663, "step": 475 }, { "epoch": 43.27, "learning_rate": 7.474747474747475e-06, "loss": 0.0397, "step": 476 }, { "epoch": 43.36, "learning_rate": 7.3737373737373745e-06, "loss": 0.0452, "step": 477 }, { "epoch": 43.45, "learning_rate": 7.272727272727272e-06, "loss": 0.0515, "step": 478 }, { "epoch": 43.55, "learning_rate": 7.171717171717173e-06, "loss": 0.0958, "step": 479 }, { "epoch": 43.64, "learning_rate": 7.0707070707070704e-06, "loss": 0.104, "step": 480 }, { "epoch": 43.73, "learning_rate": 6.969696969696971e-06, "loss": 0.0882, "step": 481 }, { "epoch": 43.82, "learning_rate": 6.8686868686868685e-06, "loss": 0.0734, "step": 482 }, { "epoch": 43.91, "learning_rate": 6.767676767676769e-06, "loss": 0.0448, "step": 483 }, { "epoch": 44.0, "learning_rate": 6.666666666666667e-06, "loss": 0.0853, "step": 484 }, { "epoch": 44.0, "eval_accuracy": 0.9305555555555556, "eval_loss": 0.2266363501548767, "eval_runtime": 1.5654, "eval_samples_per_second": 183.973, "eval_steps_per_second": 3.194, "step": 484 }, { "epoch": 44.09, "learning_rate": 6.565656565656567e-06, "loss": 0.0558, "step": 485 }, { "epoch": 44.18, "learning_rate": 6.464646464646465e-06, "loss": 0.0655, "step": 486 }, { "epoch": 44.27, "learning_rate": 6.363636363636363e-06, "loss": 0.0781, "step": 487 }, { "epoch": 44.36, "learning_rate": 6.262626262626263e-06, "loss": 0.0626, "step": 488 }, { "epoch": 44.45, "learning_rate": 6.161616161616162e-06, "loss": 0.0671, "step": 489 }, { "epoch": 44.55, "learning_rate": 6.060606060606061e-06, "loss": 0.0565, "step": 490 }, { "epoch": 44.64, "learning_rate": 5.9595959595959605e-06, "loss": 0.0517, "step": 491 }, { "epoch": 44.73, "learning_rate": 5.858585858585859e-06, "loss": 0.0785, "step": 492 }, { "epoch": 44.82, "learning_rate": 5.7575757575757586e-06, "loss": 0.0558, "step": 493 }, { "epoch": 44.91, "learning_rate": 5.656565656565657e-06, "loss": 0.0655, "step": 494 }, { "epoch": 45.0, "learning_rate": 5.555555555555556e-06, "loss": 0.0868, "step": 495 }, { "epoch": 45.0, "eval_accuracy": 0.9236111111111112, "eval_loss": 0.22659315168857574, "eval_runtime": 1.5757, "eval_samples_per_second": 182.772, "eval_steps_per_second": 3.173, "step": 495 }, { "epoch": 45.09, "learning_rate": 5.4545454545454545e-06, "loss": 0.0681, "step": 496 }, { "epoch": 45.18, "learning_rate": 5.353535353535354e-06, "loss": 0.0541, "step": 497 }, { "epoch": 45.27, "learning_rate": 5.2525252525252526e-06, "loss": 0.0695, "step": 498 }, { "epoch": 45.36, "learning_rate": 5.151515151515152e-06, "loss": 0.0344, "step": 499 }, { "epoch": 45.45, "learning_rate": 5.050505050505051e-06, "loss": 0.0554, "step": 500 }, { "epoch": 45.55, "learning_rate": 4.949494949494949e-06, "loss": 0.0691, "step": 501 }, { "epoch": 45.64, "learning_rate": 4.848484848484849e-06, "loss": 0.0715, "step": 502 }, { "epoch": 45.73, "learning_rate": 4.747474747474747e-06, "loss": 0.0519, "step": 503 }, { "epoch": 45.82, "learning_rate": 4.646464646464647e-06, "loss": 0.0658, "step": 504 }, { "epoch": 45.91, "learning_rate": 4.5454545454545455e-06, "loss": 0.088, "step": 505 }, { "epoch": 46.0, "learning_rate": 4.444444444444445e-06, "loss": 0.0554, "step": 506 }, { "epoch": 46.0, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.2163047045469284, "eval_runtime": 1.5704, "eval_samples_per_second": 183.395, "eval_steps_per_second": 3.184, "step": 506 }, { "epoch": 46.09, "learning_rate": 4.343434343434344e-06, "loss": 0.0632, "step": 507 }, { "epoch": 46.18, "learning_rate": 4.242424242424243e-06, "loss": 0.0838, "step": 508 }, { "epoch": 46.27, "learning_rate": 4.141414141414142e-06, "loss": 0.0752, "step": 509 }, { "epoch": 46.36, "learning_rate": 4.040404040404041e-06, "loss": 0.0767, "step": 510 }, { "epoch": 46.45, "learning_rate": 3.939393939393939e-06, "loss": 0.0855, "step": 511 }, { "epoch": 46.55, "learning_rate": 3.8383838383838385e-06, "loss": 0.0392, "step": 512 }, { "epoch": 46.64, "learning_rate": 3.7373737373737375e-06, "loss": 0.0681, "step": 513 }, { "epoch": 46.73, "learning_rate": 3.636363636363636e-06, "loss": 0.0551, "step": 514 }, { "epoch": 46.82, "learning_rate": 3.5353535353535352e-06, "loss": 0.0516, "step": 515 }, { "epoch": 46.91, "learning_rate": 3.4343434343434343e-06, "loss": 0.0778, "step": 516 }, { "epoch": 47.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.0508, "step": 517 }, { "epoch": 47.0, "eval_accuracy": 0.9305555555555556, "eval_loss": 0.21035218238830566, "eval_runtime": 1.5655, "eval_samples_per_second": 183.961, "eval_steps_per_second": 3.194, "step": 517 }, { "epoch": 47.09, "learning_rate": 3.2323232323232324e-06, "loss": 0.041, "step": 518 }, { "epoch": 47.18, "learning_rate": 3.1313131313131314e-06, "loss": 0.041, "step": 519 }, { "epoch": 47.27, "learning_rate": 3.0303030303030305e-06, "loss": 0.0813, "step": 520 }, { "epoch": 47.36, "learning_rate": 2.9292929292929295e-06, "loss": 0.0567, "step": 521 }, { "epoch": 47.45, "learning_rate": 2.8282828282828286e-06, "loss": 0.055, "step": 522 }, { "epoch": 47.55, "learning_rate": 2.7272727272727272e-06, "loss": 0.0505, "step": 523 }, { "epoch": 47.64, "learning_rate": 2.6262626262626263e-06, "loss": 0.0239, "step": 524 }, { "epoch": 47.73, "learning_rate": 2.5252525252525253e-06, "loss": 0.0441, "step": 525 }, { "epoch": 47.82, "learning_rate": 2.4242424242424244e-06, "loss": 0.0567, "step": 526 }, { "epoch": 47.91, "learning_rate": 2.3232323232323234e-06, "loss": 0.0711, "step": 527 }, { "epoch": 48.0, "learning_rate": 2.2222222222222225e-06, "loss": 0.0589, "step": 528 }, { "epoch": 48.0, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.21717707812786102, "eval_runtime": 1.5625, "eval_samples_per_second": 184.318, "eval_steps_per_second": 3.2, "step": 528 }, { "epoch": 48.09, "learning_rate": 2.1212121212121216e-06, "loss": 0.0363, "step": 529 }, { "epoch": 48.18, "learning_rate": 2.0202020202020206e-06, "loss": 0.0767, "step": 530 }, { "epoch": 48.27, "learning_rate": 1.9191919191919192e-06, "loss": 0.0835, "step": 531 }, { "epoch": 48.36, "learning_rate": 1.818181818181818e-06, "loss": 0.0769, "step": 532 }, { "epoch": 48.45, "learning_rate": 1.7171717171717171e-06, "loss": 0.0373, "step": 533 }, { "epoch": 48.55, "learning_rate": 1.6161616161616162e-06, "loss": 0.0473, "step": 534 }, { "epoch": 48.64, "learning_rate": 1.5151515151515152e-06, "loss": 0.0439, "step": 535 }, { "epoch": 48.73, "learning_rate": 1.4141414141414143e-06, "loss": 0.0615, "step": 536 }, { "epoch": 48.82, "learning_rate": 1.3131313131313131e-06, "loss": 0.0554, "step": 537 }, { "epoch": 48.91, "learning_rate": 1.2121212121212122e-06, "loss": 0.0669, "step": 538 }, { "epoch": 49.0, "learning_rate": 1.1111111111111112e-06, "loss": 0.0369, "step": 539 }, { "epoch": 49.0, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.22139286994934082, "eval_runtime": 1.5665, "eval_samples_per_second": 183.854, "eval_steps_per_second": 3.192, "step": 539 }, { "epoch": 49.09, "learning_rate": 1.0101010101010103e-06, "loss": 0.043, "step": 540 }, { "epoch": 49.18, "learning_rate": 9.09090909090909e-07, "loss": 0.0441, "step": 541 }, { "epoch": 49.27, "learning_rate": 8.080808080808081e-07, "loss": 0.0507, "step": 542 }, { "epoch": 49.36, "learning_rate": 7.070707070707071e-07, "loss": 0.0612, "step": 543 }, { "epoch": 49.45, "learning_rate": 6.060606060606061e-07, "loss": 0.0719, "step": 544 }, { "epoch": 49.55, "learning_rate": 5.050505050505052e-07, "loss": 0.0757, "step": 545 }, { "epoch": 49.64, "learning_rate": 4.0404040404040405e-07, "loss": 0.0473, "step": 546 }, { "epoch": 49.73, "learning_rate": 3.0303030303030305e-07, "loss": 0.0859, "step": 547 }, { "epoch": 49.82, "learning_rate": 2.0202020202020202e-07, "loss": 0.0462, "step": 548 }, { "epoch": 49.91, "learning_rate": 1.0101010101010101e-07, "loss": 0.0517, "step": 549 }, { "epoch": 50.0, "learning_rate": 0.0, "loss": 0.0852, "step": 550 }, { "epoch": 50.0, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.22411441802978516, "eval_runtime": 1.5744, "eval_samples_per_second": 182.932, "eval_steps_per_second": 3.176, "step": 550 }, { "epoch": 50.0, "step": 550, "total_flos": 1.0035482482400256e+19, "train_loss": 0.24394690781493078, "train_runtime": 2413.5397, "train_samples_per_second": 53.656, "train_steps_per_second": 0.228 } ], "max_steps": 550, "num_train_epochs": 50, "total_flos": 1.0035482482400256e+19, "trial_name": null, "trial_params": null }